kern_jail.c revision 168699
1139804Simp/*- 246197Sphk * ---------------------------------------------------------------------------- 346197Sphk * "THE BEER-WARE LICENSE" (Revision 42): 446197Sphk * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 546197Sphk * can do whatever you want with this stuff. If we meet some day, and you think 646197Sphk * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 746197Sphk * ---------------------------------------------------------------------------- 846197Sphk */ 946155Sphk 10116182Sobrien#include <sys/cdefs.h> 11116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 168699 2007-04-13 23:54:22Z pjd $"); 12116182Sobrien 13131177Spjd#include "opt_mac.h" 14131177Spjd 1546155Sphk#include <sys/param.h> 1646155Sphk#include <sys/types.h> 1746155Sphk#include <sys/kernel.h> 1846155Sphk#include <sys/systm.h> 1946155Sphk#include <sys/errno.h> 2046155Sphk#include <sys/sysproto.h> 2146155Sphk#include <sys/malloc.h> 22164032Srwatson#include <sys/priv.h> 2346155Sphk#include <sys/proc.h> 24124882Srwatson#include <sys/taskqueue.h> 2546155Sphk#include <sys/jail.h> 2687275Srwatson#include <sys/lock.h> 2787275Srwatson#include <sys/mutex.h> 28168401Spjd#include <sys/sx.h> 29113275Smike#include <sys/namei.h> 30147185Spjd#include <sys/mount.h> 31113275Smike#include <sys/queue.h> 3246155Sphk#include <sys/socket.h> 33113275Smike#include <sys/syscallsubr.h> 3457163Srwatson#include <sys/sysctl.h> 35113275Smike#include <sys/vnode.h> 3646155Sphk#include <net/if.h> 3746155Sphk#include <netinet/in.h> 3846155Sphk 39163606Srwatson#include <security/mac/mac_framework.h> 40163606Srwatson 4146155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 4246155Sphk 4389414SarrSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 4457163Srwatson "Jail rules"); 4557163Srwatson 4657163Srwatsonint jail_set_hostname_allowed = 1; 4789414SarrSYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 4857163Srwatson &jail_set_hostname_allowed, 0, 4957163Srwatson "Processes in jail can set their hostnames"); 5057163Srwatson 5161235Srwatsonint jail_socket_unixiproute_only = 1; 5289414SarrSYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 5361235Srwatson &jail_socket_unixiproute_only, 0, 5461235Srwatson "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 5561235Srwatson 5668024Srwatsonint jail_sysvipc_allowed = 0; 5789414SarrSYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 5868024Srwatson &jail_sysvipc_allowed, 0, 5968024Srwatson "Processes in jail can use System V IPC primitives"); 6068024Srwatson 61147185Spjdstatic int jail_enforce_statfs = 2; 62147185SpjdSYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 63147185Spjd &jail_enforce_statfs, 0, 64147185Spjd "Processes in jail cannot see all mounted file systems"); 65125804Srwatson 66128664Sbmilekicint jail_allow_raw_sockets = 0; 67128664SbmilekicSYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 68128664Sbmilekic &jail_allow_raw_sockets, 0, 69128664Sbmilekic "Prison root can create raw sockets"); 70128664Sbmilekic 71141543Scpercivaint jail_chflags_allowed = 0; 72141543ScpercivaSYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 73141543Scperciva &jail_chflags_allowed, 0, 74141543Scperciva "Processes in jail can alter system file flags"); 75141543Scperciva 76168396Spjdint jail_mount_allowed = 0; 77168396SpjdSYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 78168396Spjd &jail_mount_allowed, 0, 79168396Spjd "Processes in jail can mount/unmount jail-friendly file systems"); 80168396Spjd 81168401Spjd/* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 82113275Smikestruct prisonlist allprison; 83168401Spjdstruct sx allprison_lock; 84113275Smikeint lastprid = 0; 85113275Smikeint prisoncount = 0; 86113275Smike 87168401Spjd/* 88168401Spjd * List of jail services. Protected by allprison_lock. 89168401Spjd */ 90168401SpjdTAILQ_HEAD(prison_services_head, prison_service); 91168401Spjdstatic struct prison_services_head prison_services = 92168401Spjd TAILQ_HEAD_INITIALIZER(prison_services); 93168401Spjdstatic int prison_service_slots = 0; 94168401Spjd 95168401Spjdstruct prison_service { 96168401Spjd prison_create_t ps_create; 97168401Spjd prison_destroy_t ps_destroy; 98168401Spjd int ps_slotno; 99168401Spjd TAILQ_ENTRY(prison_service) ps_next; 100168401Spjd char ps_name[0]; 101168401Spjd}; 102168401Spjd 103113275Smikestatic void init_prison(void *); 104124882Srwatsonstatic void prison_complete(void *context, int pending); 105113275Smikestatic int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 106113275Smike 107113275Smikestatic void 108113275Smikeinit_prison(void *data __unused) 109113275Smike{ 110113275Smike 111168401Spjd sx_init(&allprison_lock, "allprison"); 112113275Smike LIST_INIT(&allprison); 113113275Smike} 114113275Smike 115113275SmikeSYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 116113275Smike 11782710Sdillon/* 118114168Smike * struct jail_args { 119114168Smike * struct jail *jail; 120114168Smike * }; 12182710Sdillon */ 12246155Sphkint 123114168Smikejail(struct thread *td, struct jail_args *uap) 12446155Sphk{ 125113275Smike struct nameidata nd; 126113275Smike struct prison *pr, *tpr; 127168401Spjd struct prison_service *psrv; 12846155Sphk struct jail j; 129113275Smike struct jail_attach_args jaa; 130150652Scsjp int vfslocked, error, tryprid; 13146155Sphk 132114168Smike error = copyin(uap->jail, &j, sizeof(j)); 13346155Sphk if (error) 13484828Sjhb return (error); 13584828Sjhb if (j.version != 0) 13684828Sjhb return (EINVAL); 13784828Sjhb 138114168Smike MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 13993818Sjhb mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 140113275Smike pr->pr_ref = 1; 141114168Smike error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 142113275Smike if (error) 143113275Smike goto e_killmtx; 144150652Scsjp NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 145150652Scsjp pr->pr_path, td); 146113275Smike error = namei(&nd); 147150652Scsjp if (error) 148113275Smike goto e_killmtx; 149150652Scsjp vfslocked = NDHASGIANT(&nd); 150113275Smike pr->pr_root = nd.ni_vp; 151113275Smike VOP_UNLOCK(nd.ni_vp, 0, td); 152113275Smike NDFREE(&nd, NDF_ONLY_PNBUF); 153150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 154114168Smike error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 15584828Sjhb if (error) 156113275Smike goto e_dropvnref; 157113275Smike pr->pr_ip = j.ip_number; 158113275Smike pr->pr_linux = NULL; 159113275Smike pr->pr_securelevel = securelevel; 160168401Spjd if (prison_service_slots == 0) 161168401Spjd pr->pr_slots = NULL; 162168401Spjd else { 163168401Spjd pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 164168401Spjd M_PRISON, M_ZERO | M_WAITOK); 165168401Spjd } 166113275Smike 167113275Smike /* Determine next pr_id and add prison to allprison list. */ 168168401Spjd sx_xlock(&allprison_lock); 169113275Smike tryprid = lastprid + 1; 170113275Smike if (tryprid == JAIL_MAX) 171113275Smike tryprid = 1; 172113275Smikenext: 173113275Smike LIST_FOREACH(tpr, &allprison, pr_list) { 174113275Smike if (tpr->pr_id == tryprid) { 175113275Smike tryprid++; 176113275Smike if (tryprid == JAIL_MAX) { 177168401Spjd sx_xunlock(&allprison_lock); 178113275Smike error = EAGAIN; 179113275Smike goto e_dropvnref; 180113275Smike } 181113275Smike goto next; 182113275Smike } 183113275Smike } 184113275Smike pr->pr_id = jaa.jid = lastprid = tryprid; 185113275Smike LIST_INSERT_HEAD(&allprison, pr, pr_list); 186113275Smike prisoncount++; 187168401Spjd sx_downgrade(&allprison_lock); 188168401Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 189168401Spjd psrv->ps_create(psrv, pr); 190168401Spjd } 191168401Spjd sx_sunlock(&allprison_lock); 192113275Smike 193113275Smike error = jail_attach(td, &jaa); 194113275Smike if (error) 195113275Smike goto e_dropprref; 196113275Smike mtx_lock(&pr->pr_mtx); 197113275Smike pr->pr_ref--; 198113275Smike mtx_unlock(&pr->pr_mtx); 199113275Smike td->td_retval[0] = jaa.jid; 200113275Smike return (0); 201113275Smikee_dropprref: 202168401Spjd sx_xlock(&allprison_lock); 203113275Smike LIST_REMOVE(pr, pr_list); 204113275Smike prisoncount--; 205168401Spjd sx_downgrade(&allprison_lock); 206168401Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 207168401Spjd psrv->ps_destroy(psrv, pr); 208168401Spjd } 209168401Spjd sx_sunlock(&allprison_lock); 210113275Smikee_dropvnref: 211150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 212113275Smike vrele(pr->pr_root); 213150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 214113275Smikee_killmtx: 215113275Smike mtx_destroy(&pr->pr_mtx); 216113275Smike FREE(pr, M_PRISON); 217113275Smike return (error); 218113275Smike} 219113275Smike 220113275Smike/* 221114168Smike * struct jail_attach_args { 222114168Smike * int jid; 223114168Smike * }; 224113275Smike */ 225113275Smikeint 226114168Smikejail_attach(struct thread *td, struct jail_attach_args *uap) 227113275Smike{ 228113275Smike struct proc *p; 229113275Smike struct ucred *newcred, *oldcred; 230113275Smike struct prison *pr; 231150652Scsjp int vfslocked, error; 232167309Spjd 233126023Snectar /* 234126023Snectar * XXX: Note that there is a slight race here if two threads 235126023Snectar * in the same privileged process attempt to attach to two 236126023Snectar * different jails at the same time. It is important for 237126023Snectar * user processes not to do this, or they might end up with 238126023Snectar * a process root from one prison, but attached to the jail 239126023Snectar * of another. 240126023Snectar */ 241164032Srwatson error = priv_check(td, PRIV_JAIL_ATTACH); 242126023Snectar if (error) 243126023Snectar return (error); 244126023Snectar 245113275Smike p = td->td_proc; 246168401Spjd sx_slock(&allprison_lock); 247113275Smike pr = prison_find(uap->jid); 248113275Smike if (pr == NULL) { 249168401Spjd sx_sunlock(&allprison_lock); 250113275Smike return (EINVAL); 251113275Smike } 252113275Smike pr->pr_ref++; 253113275Smike mtx_unlock(&pr->pr_mtx); 254168401Spjd sx_sunlock(&allprison_lock); 255113275Smike 256150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 257113275Smike vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td); 258113275Smike if ((error = change_dir(pr->pr_root, td)) != 0) 259113275Smike goto e_unlock; 260113275Smike#ifdef MAC 261113275Smike if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root))) 262113275Smike goto e_unlock; 263113275Smike#endif 264113275Smike VOP_UNLOCK(pr->pr_root, 0, td); 265113275Smike change_root(pr->pr_root, td); 266150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 267113275Smike 26884828Sjhb newcred = crget(); 26984828Sjhb PROC_LOCK(p); 27084828Sjhb oldcred = p->p_ucred; 271113275Smike setsugid(p); 27284828Sjhb crcopy(newcred, oldcred); 273113630Sjhb newcred->cr_prison = pr; 27484828Sjhb p->p_ucred = newcred; 27584828Sjhb PROC_UNLOCK(p); 27684828Sjhb crfree(oldcred); 27746155Sphk return (0); 278113275Smikee_unlock: 279113275Smike VOP_UNLOCK(pr->pr_root, 0, td); 280150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 281113275Smike mtx_lock(&pr->pr_mtx); 282113275Smike pr->pr_ref--; 283113275Smike mtx_unlock(&pr->pr_mtx); 28446155Sphk return (error); 28546155Sphk} 28646155Sphk 287113275Smike/* 288113275Smike * Returns a locked prison instance, or NULL on failure. 289113275Smike */ 290168399Spjdstruct prison * 291113275Smikeprison_find(int prid) 292113275Smike{ 293113275Smike struct prison *pr; 294113275Smike 295168401Spjd sx_assert(&allprison_lock, SX_LOCKED); 296113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 297113275Smike if (pr->pr_id == prid) { 298113275Smike mtx_lock(&pr->pr_mtx); 299168489Spjd if (pr->pr_ref == 0) { 300168489Spjd mtx_unlock(&pr->pr_mtx); 301168489Spjd break; 302168489Spjd } 303113275Smike return (pr); 304113275Smike } 305113275Smike } 306113275Smike return (NULL); 307113275Smike} 308113275Smike 30972786Srwatsonvoid 31072786Srwatsonprison_free(struct prison *pr) 31172786Srwatson{ 31272786Srwatson 31387275Srwatson mtx_lock(&pr->pr_mtx); 31472786Srwatson pr->pr_ref--; 31572786Srwatson if (pr->pr_ref == 0) { 316168483Spjd mtx_unlock(&pr->pr_mtx); 317124882Srwatson TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 318144660Sjeff taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 31987275Srwatson return; 32072786Srwatson } 32187275Srwatson mtx_unlock(&pr->pr_mtx); 32272786Srwatson} 32372786Srwatson 324124882Srwatsonstatic void 325124882Srwatsonprison_complete(void *context, int pending) 326124882Srwatson{ 327168489Spjd struct prison_service *psrv; 328124882Srwatson struct prison *pr; 329150652Scsjp int vfslocked; 330124882Srwatson 331124882Srwatson pr = (struct prison *)context; 332124882Srwatson 333168489Spjd sx_xlock(&allprison_lock); 334168489Spjd LIST_REMOVE(pr, pr_list); 335168489Spjd prisoncount--; 336168489Spjd sx_downgrade(&allprison_lock); 337168489Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 338168489Spjd psrv->ps_destroy(psrv, pr); 339168489Spjd } 340168489Spjd sx_sunlock(&allprison_lock); 341168489Spjd 342150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 343124882Srwatson vrele(pr->pr_root); 344150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 345124882Srwatson 346124882Srwatson mtx_destroy(&pr->pr_mtx); 347124882Srwatson if (pr->pr_linux != NULL) 348124882Srwatson FREE(pr->pr_linux, M_PRISON); 349124882Srwatson FREE(pr, M_PRISON); 350124882Srwatson} 351124882Srwatson 35272786Srwatsonvoid 35372786Srwatsonprison_hold(struct prison *pr) 35472786Srwatson{ 35572786Srwatson 35687275Srwatson mtx_lock(&pr->pr_mtx); 357168489Spjd KASSERT(pr->pr_ref > 0, 358168489Spjd ("Trying to hold dead prison (id=%d).", pr->pr_id)); 35972786Srwatson pr->pr_ref++; 36087275Srwatson mtx_unlock(&pr->pr_mtx); 36172786Srwatson} 36272786Srwatson 36387275Srwatsonu_int32_t 36487275Srwatsonprison_getip(struct ucred *cred) 36587275Srwatson{ 36687275Srwatson 36787275Srwatson return (cred->cr_prison->pr_ip); 36887275Srwatson} 36987275Srwatson 37046155Sphkint 37172786Srwatsonprison_ip(struct ucred *cred, int flag, u_int32_t *ip) 37246155Sphk{ 37346155Sphk u_int32_t tmp; 37446155Sphk 37572786Srwatson if (!jailed(cred)) 37646155Sphk return (0); 377167309Spjd if (flag) 37846155Sphk tmp = *ip; 37946155Sphk else 38046155Sphk tmp = ntohl(*ip); 38146155Sphk if (tmp == INADDR_ANY) { 382167309Spjd if (flag) 38372786Srwatson *ip = cred->cr_prison->pr_ip; 38446155Sphk else 38572786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 38646155Sphk return (0); 38746155Sphk } 38881114Srwatson if (tmp == INADDR_LOOPBACK) { 38981114Srwatson if (flag) 39081114Srwatson *ip = cred->cr_prison->pr_ip; 39181114Srwatson else 39281114Srwatson *ip = htonl(cred->cr_prison->pr_ip); 39381114Srwatson return (0); 39481114Srwatson } 39572786Srwatson if (cred->cr_prison->pr_ip != tmp) 39646155Sphk return (1); 39746155Sphk return (0); 39846155Sphk} 39946155Sphk 40046155Sphkvoid 40172786Srwatsonprison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 40246155Sphk{ 40346155Sphk u_int32_t tmp; 40446155Sphk 40572786Srwatson if (!jailed(cred)) 40646155Sphk return; 40746155Sphk if (flag) 40846155Sphk tmp = *ip; 40946155Sphk else 41046155Sphk tmp = ntohl(*ip); 41181114Srwatson if (tmp == INADDR_LOOPBACK) { 41246155Sphk if (flag) 41372786Srwatson *ip = cred->cr_prison->pr_ip; 41446155Sphk else 41572786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 41646155Sphk return; 41746155Sphk } 41846155Sphk return; 41946155Sphk} 42046155Sphk 42146155Sphkint 42272786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa) 42346155Sphk{ 424114168Smike struct sockaddr_in *sai; 42546155Sphk int ok; 42646155Sphk 427114168Smike sai = (struct sockaddr_in *)sa; 42861235Srwatson if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 42961235Srwatson ok = 1; 43061235Srwatson else if (sai->sin_family != AF_INET) 43146155Sphk ok = 0; 43272786Srwatson else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 43346155Sphk ok = 1; 43446155Sphk else 43546155Sphk ok = 0; 43646155Sphk return (ok); 43746155Sphk} 43872786Srwatson 43972786Srwatson/* 44072786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 44172786Srwatson */ 44272786Srwatsonint 443114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2) 44472786Srwatson{ 44572786Srwatson 44672786Srwatson if (jailed(cred1)) { 44772786Srwatson if (!jailed(cred2)) 44872786Srwatson return (ESRCH); 44972786Srwatson if (cred2->cr_prison != cred1->cr_prison) 45072786Srwatson return (ESRCH); 45172786Srwatson } 45272786Srwatson 45372786Srwatson return (0); 45472786Srwatson} 45572786Srwatson 45672786Srwatson/* 45772786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0. 45872786Srwatson */ 45972786Srwatsonint 460114168Smikejailed(struct ucred *cred) 46172786Srwatson{ 46272786Srwatson 46372786Srwatson return (cred->cr_prison != NULL); 46472786Srwatson} 46591384Srobert 46691384Srobert/* 46791384Srobert * Return the correct hostname for the passed credential. 46891384Srobert */ 46991391Srobertvoid 470114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size) 47191384Srobert{ 47291384Srobert 47391391Srobert if (jailed(cred)) { 47491391Srobert mtx_lock(&cred->cr_prison->pr_mtx); 475105354Srobert strlcpy(buf, cred->cr_prison->pr_host, size); 47691391Srobert mtx_unlock(&cred->cr_prison->pr_mtx); 477114168Smike } else 478105354Srobert strlcpy(buf, hostname, size); 47991384Srobert} 480113275Smike 481125804Srwatson/* 482147185Spjd * Determine whether the subject represented by cred can "see" 483147185Spjd * status of a mount point. 484147185Spjd * Returns: 0 for permitted, ENOENT otherwise. 485147185Spjd * XXX: This function should be called cr_canseemount() and should be 486147185Spjd * placed in kern_prot.c. 487125804Srwatson */ 488125804Srwatsonint 489147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp) 490125804Srwatson{ 491147185Spjd struct prison *pr; 492147185Spjd struct statfs *sp; 493147185Spjd size_t len; 494125804Srwatson 495147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 496147185Spjd return (0); 497147185Spjd pr = cred->cr_prison; 498147185Spjd if (pr->pr_root->v_mount == mp) 499147185Spjd return (0); 500147185Spjd if (jail_enforce_statfs == 2) 501147185Spjd return (ENOENT); 502147185Spjd /* 503147185Spjd * If jail's chroot directory is set to "/" we should be able to see 504147185Spjd * all mount-points from inside a jail. 505147185Spjd * This is ugly check, but this is the only situation when jail's 506147185Spjd * directory ends with '/'. 507147185Spjd */ 508147185Spjd if (strcmp(pr->pr_path, "/") == 0) 509147185Spjd return (0); 510147185Spjd len = strlen(pr->pr_path); 511147185Spjd sp = &mp->mnt_stat; 512147185Spjd if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 513147185Spjd return (ENOENT); 514147185Spjd /* 515147185Spjd * Be sure that we don't have situation where jail's root directory 516147185Spjd * is "/some/path" and mount point is "/some/pathpath". 517147185Spjd */ 518147185Spjd if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 519147185Spjd return (ENOENT); 520147185Spjd return (0); 521147185Spjd} 522147185Spjd 523147185Spjdvoid 524147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 525147185Spjd{ 526147185Spjd char jpath[MAXPATHLEN]; 527147185Spjd struct prison *pr; 528147185Spjd size_t len; 529147185Spjd 530147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 531147185Spjd return; 532147185Spjd pr = cred->cr_prison; 533147185Spjd if (prison_canseemount(cred, mp) != 0) { 534147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 535147185Spjd strlcpy(sp->f_mntonname, "[restricted]", 536147185Spjd sizeof(sp->f_mntonname)); 537147185Spjd return; 538125804Srwatson } 539147185Spjd if (pr->pr_root->v_mount == mp) { 540147185Spjd /* 541147185Spjd * Clear current buffer data, so we are sure nothing from 542147185Spjd * the valid path left there. 543147185Spjd */ 544147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 545147185Spjd *sp->f_mntonname = '/'; 546147185Spjd return; 547147185Spjd } 548147185Spjd /* 549147185Spjd * If jail's chroot directory is set to "/" we should be able to see 550147185Spjd * all mount-points from inside a jail. 551147185Spjd */ 552147185Spjd if (strcmp(pr->pr_path, "/") == 0) 553147185Spjd return; 554147185Spjd len = strlen(pr->pr_path); 555147185Spjd strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 556147185Spjd /* 557147185Spjd * Clear current buffer data, so we are sure nothing from 558147185Spjd * the valid path left there. 559147185Spjd */ 560147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 561147185Spjd if (*jpath == '\0') { 562147185Spjd /* Should never happen. */ 563147185Spjd *sp->f_mntonname = '/'; 564147185Spjd } else { 565147185Spjd strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 566147185Spjd } 567125804Srwatson} 568125804Srwatson 569164032Srwatson/* 570164032Srwatson * Check with permission for a specific privilege is granted within jail. We 571164032Srwatson * have a specific list of accepted privileges; the rest are denied. 572164032Srwatson */ 573164032Srwatsonint 574164032Srwatsonprison_priv_check(struct ucred *cred, int priv) 575164032Srwatson{ 576164032Srwatson 577164032Srwatson if (!jailed(cred)) 578164032Srwatson return (0); 579164032Srwatson 580164032Srwatson switch (priv) { 581164032Srwatson 582164032Srwatson /* 583164032Srwatson * Allow ktrace privileges for root in jail. 584164032Srwatson */ 585164032Srwatson case PRIV_KTRACE: 586164032Srwatson 587166827Srwatson#if 0 588164032Srwatson /* 589164032Srwatson * Allow jailed processes to configure audit identity and 590164032Srwatson * submit audit records (login, etc). In the future we may 591164032Srwatson * want to further refine the relationship between audit and 592164032Srwatson * jail. 593164032Srwatson */ 594164032Srwatson case PRIV_AUDIT_GETAUDIT: 595164032Srwatson case PRIV_AUDIT_SETAUDIT: 596164032Srwatson case PRIV_AUDIT_SUBMIT: 597166827Srwatson#endif 598164032Srwatson 599164032Srwatson /* 600164032Srwatson * Allow jailed processes to manipulate process UNIX 601164032Srwatson * credentials in any way they see fit. 602164032Srwatson */ 603164032Srwatson case PRIV_CRED_SETUID: 604164032Srwatson case PRIV_CRED_SETEUID: 605164032Srwatson case PRIV_CRED_SETGID: 606164032Srwatson case PRIV_CRED_SETEGID: 607164032Srwatson case PRIV_CRED_SETGROUPS: 608164032Srwatson case PRIV_CRED_SETREUID: 609164032Srwatson case PRIV_CRED_SETREGID: 610164032Srwatson case PRIV_CRED_SETRESUID: 611164032Srwatson case PRIV_CRED_SETRESGID: 612164032Srwatson 613164032Srwatson /* 614164032Srwatson * Jail implements visibility constraints already, so allow 615164032Srwatson * jailed root to override uid/gid-based constraints. 616164032Srwatson */ 617164032Srwatson case PRIV_SEEOTHERGIDS: 618164032Srwatson case PRIV_SEEOTHERUIDS: 619164032Srwatson 620164032Srwatson /* 621164032Srwatson * Jail implements inter-process debugging limits already, so 622164032Srwatson * allow jailed root various debugging privileges. 623164032Srwatson */ 624164032Srwatson case PRIV_DEBUG_DIFFCRED: 625164032Srwatson case PRIV_DEBUG_SUGID: 626164032Srwatson case PRIV_DEBUG_UNPRIV: 627164032Srwatson 628164032Srwatson /* 629164032Srwatson * Allow jail to set various resource limits and login 630164032Srwatson * properties, and for now, exceed process resource limits. 631164032Srwatson */ 632164032Srwatson case PRIV_PROC_LIMIT: 633164032Srwatson case PRIV_PROC_SETLOGIN: 634164032Srwatson case PRIV_PROC_SETRLIMIT: 635164032Srwatson 636164032Srwatson /* 637164032Srwatson * System V and POSIX IPC privileges are granted in jail. 638164032Srwatson */ 639164032Srwatson case PRIV_IPC_READ: 640164032Srwatson case PRIV_IPC_WRITE: 641164032Srwatson case PRIV_IPC_ADMIN: 642164032Srwatson case PRIV_IPC_MSGSIZE: 643164032Srwatson case PRIV_MQ_ADMIN: 644164032Srwatson 645164032Srwatson /* 646164032Srwatson * Jail implements its own inter-process limits, so allow 647164032Srwatson * root processes in jail to change scheduling on other 648164032Srwatson * processes in the same jail. Likewise for signalling. 649164032Srwatson */ 650164032Srwatson case PRIV_SCHED_DIFFCRED: 651164032Srwatson case PRIV_SIGNAL_DIFFCRED: 652164032Srwatson case PRIV_SIGNAL_SUGID: 653164032Srwatson 654164032Srwatson /* 655164032Srwatson * Allow jailed processes to write to sysctls marked as jail 656164032Srwatson * writable. 657164032Srwatson */ 658164032Srwatson case PRIV_SYSCTL_WRITEJAIL: 659164032Srwatson 660164032Srwatson /* 661164032Srwatson * Allow root in jail to manage a variety of quota 662166831Srwatson * properties. These should likely be conditional on a 663166831Srwatson * configuration option. 664164032Srwatson */ 665166832Srwatson case PRIV_VFS_GETQUOTA: 666166832Srwatson case PRIV_VFS_SETQUOTA: 667164032Srwatson 668164032Srwatson /* 669164032Srwatson * Since Jail relies on chroot() to implement file system 670164032Srwatson * protections, grant many VFS privileges to root in jail. 671164032Srwatson * Be careful to exclude mount-related and NFS-related 672164032Srwatson * privileges. 673164032Srwatson */ 674164032Srwatson case PRIV_VFS_READ: 675164032Srwatson case PRIV_VFS_WRITE: 676164032Srwatson case PRIV_VFS_ADMIN: 677164032Srwatson case PRIV_VFS_EXEC: 678164032Srwatson case PRIV_VFS_LOOKUP: 679164032Srwatson case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 680164032Srwatson case PRIV_VFS_CHFLAGS_DEV: 681164032Srwatson case PRIV_VFS_CHOWN: 682164032Srwatson case PRIV_VFS_CHROOT: 683167152Spjd case PRIV_VFS_RETAINSUGID: 684164032Srwatson case PRIV_VFS_FCHROOT: 685164032Srwatson case PRIV_VFS_LINK: 686164032Srwatson case PRIV_VFS_SETGID: 687164032Srwatson case PRIV_VFS_STICKYFILE: 688164032Srwatson return (0); 689164032Srwatson 690164032Srwatson /* 691164032Srwatson * Depending on the global setting, allow privilege of 692164032Srwatson * setting system flags. 693164032Srwatson */ 694164032Srwatson case PRIV_VFS_SYSFLAGS: 695164032Srwatson if (jail_chflags_allowed) 696164032Srwatson return (0); 697164032Srwatson else 698164032Srwatson return (EPERM); 699164032Srwatson 700164032Srwatson /* 701168396Spjd * Depending on the global setting, allow privilege of 702168396Spjd * mounting/unmounting file systems. 703168396Spjd */ 704168396Spjd case PRIV_VFS_MOUNT: 705168396Spjd case PRIV_VFS_UNMOUNT: 706168396Spjd case PRIV_VFS_MOUNT_NONUSER: 707168699Spjd case PRIV_VFS_MOUNT_OWNER: 708168396Spjd if (jail_mount_allowed) 709168396Spjd return (0); 710168396Spjd else 711168396Spjd return (EPERM); 712168396Spjd 713168396Spjd /* 714168591Srwatson * Allow jailed root to bind reserved ports and reuse in-use 715168591Srwatson * ports. 716164032Srwatson */ 717164032Srwatson case PRIV_NETINET_RESERVEDPORT: 718168591Srwatson case PRIV_NETINET_REUSEPORT: 719164032Srwatson return (0); 720164032Srwatson 721164032Srwatson /* 722164032Srwatson * Conditionally allow creating raw sockets in jail. 723164032Srwatson */ 724164032Srwatson case PRIV_NETINET_RAW: 725164032Srwatson if (jail_allow_raw_sockets) 726164032Srwatson return (0); 727164032Srwatson else 728164032Srwatson return (EPERM); 729164032Srwatson 730164032Srwatson /* 731164032Srwatson * Since jail implements its own visibility limits on netstat 732164032Srwatson * sysctls, allow getcred. This allows identd to work in 733164032Srwatson * jail. 734164032Srwatson */ 735164032Srwatson case PRIV_NETINET_GETCRED: 736164032Srwatson return (0); 737164032Srwatson 738164032Srwatson default: 739164032Srwatson /* 740164032Srwatson * In all remaining cases, deny the privilege request. This 741164032Srwatson * includes almost all network privileges, many system 742164032Srwatson * configuration privileges. 743164032Srwatson */ 744164032Srwatson return (EPERM); 745164032Srwatson } 746164032Srwatson} 747164032Srwatson 748168401Spjd/* 749168401Spjd * Register jail service. Provides 'create' and 'destroy' methods. 750168401Spjd * 'create' method will be called for every existing jail and all 751168401Spjd * jails in the future as they beeing created. 752168401Spjd * 'destroy' method will be called for every jail going away and 753168401Spjd * for all existing jails at the time of service deregistration. 754168401Spjd */ 755168401Spjdstruct prison_service * 756168401Spjdprison_service_register(const char *name, prison_create_t create, 757168401Spjd prison_destroy_t destroy) 758168401Spjd{ 759168401Spjd struct prison_service *psrv, *psrv2; 760168401Spjd struct prison *pr; 761168401Spjd int reallocate = 1, slotno = 0; 762168401Spjd void **slots, **oldslots; 763168401Spjd 764168401Spjd psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 765168401Spjd M_WAITOK | M_ZERO); 766168401Spjd psrv->ps_create = create; 767168401Spjd psrv->ps_destroy = destroy; 768168401Spjd strcpy(psrv->ps_name, name); 769168401Spjd /* 770168401Spjd * Grab the allprison_lock here, so we won't miss any jail 771168401Spjd * creation/destruction. 772168401Spjd */ 773168401Spjd sx_xlock(&allprison_lock); 774168401Spjd#ifdef INVARIANTS 775168401Spjd /* 776168401Spjd * Verify if service is not already registered. 777168401Spjd */ 778168401Spjd TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 779168401Spjd KASSERT(strcmp(psrv2->ps_name, name) != 0, 780168401Spjd ("jail service %s already registered", name)); 781168401Spjd } 782168401Spjd#endif 783168401Spjd /* 784168401Spjd * Find free slot. When there is no existing free slot available, 785168401Spjd * allocate one at the end. 786168401Spjd */ 787168401Spjd TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 788168401Spjd if (psrv2->ps_slotno != slotno) { 789168401Spjd KASSERT(slotno < psrv2->ps_slotno, 790168401Spjd ("Invalid slotno (slotno=%d >= ps_slotno=%d", 791168401Spjd slotno, psrv2->ps_slotno)); 792168401Spjd /* We found free slot. */ 793168401Spjd reallocate = 0; 794168401Spjd break; 795168401Spjd } 796168401Spjd slotno++; 797168401Spjd } 798168401Spjd psrv->ps_slotno = slotno; 799168401Spjd /* 800168401Spjd * Keep the list sorted by slot number. 801168401Spjd */ 802168401Spjd if (psrv2 != NULL) { 803168401Spjd KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 804168401Spjd TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 805168401Spjd } else { 806168401Spjd KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 807168401Spjd TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 808168401Spjd } 809168401Spjd prison_service_slots++; 810168401Spjd sx_downgrade(&allprison_lock); 811168401Spjd /* 812168401Spjd * Allocate memory for new slot if we didn't found empty one. 813168401Spjd * Do not use realloc(9), because pr_slots is protected with a mutex, 814168401Spjd * so we can't sleep. 815168401Spjd */ 816168401Spjd LIST_FOREACH(pr, &allprison, pr_list) { 817168401Spjd if (reallocate) { 818168401Spjd /* First allocate memory with M_WAITOK. */ 819168401Spjd slots = malloc(sizeof(*slots) * prison_service_slots, 820168401Spjd M_PRISON, M_WAITOK); 821168401Spjd /* Now grab the mutex and replace pr_slots. */ 822168401Spjd mtx_lock(&pr->pr_mtx); 823168401Spjd oldslots = pr->pr_slots; 824168401Spjd if (psrv->ps_slotno > 0) { 825168401Spjd bcopy(oldslots, slots, 826168401Spjd sizeof(*slots) * (prison_service_slots - 1)); 827168401Spjd } 828168401Spjd slots[psrv->ps_slotno] = NULL; 829168401Spjd pr->pr_slots = slots; 830168401Spjd mtx_unlock(&pr->pr_mtx); 831168401Spjd if (oldslots != NULL) 832168401Spjd free(oldslots, M_PRISON); 833168401Spjd } 834168401Spjd /* 835168401Spjd * Call 'create' method for each existing jail. 836168401Spjd */ 837168401Spjd psrv->ps_create(psrv, pr); 838168401Spjd } 839168401Spjd sx_sunlock(&allprison_lock); 840168401Spjd 841168401Spjd return (psrv); 842168401Spjd} 843168401Spjd 844168401Spjdvoid 845168401Spjdprison_service_deregister(struct prison_service *psrv) 846168401Spjd{ 847168401Spjd struct prison *pr; 848168401Spjd void **slots, **oldslots; 849168401Spjd int last = 0; 850168401Spjd 851168401Spjd sx_xlock(&allprison_lock); 852168401Spjd if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 853168401Spjd last = 1; 854168401Spjd TAILQ_REMOVE(&prison_services, psrv, ps_next); 855168401Spjd prison_service_slots--; 856168401Spjd sx_downgrade(&allprison_lock); 857168401Spjd LIST_FOREACH(pr, &allprison, pr_list) { 858168401Spjd /* 859168401Spjd * Call 'destroy' method for every currently existing jail. 860168401Spjd */ 861168401Spjd psrv->ps_destroy(psrv, pr); 862168401Spjd /* 863168401Spjd * If this is the last slot, free the memory allocated for it. 864168401Spjd */ 865168401Spjd if (last) { 866168401Spjd if (prison_service_slots == 0) 867168401Spjd slots = NULL; 868168401Spjd else { 869168401Spjd slots = malloc(sizeof(*slots) * prison_service_slots, 870168401Spjd M_PRISON, M_WAITOK); 871168401Spjd } 872168401Spjd mtx_lock(&pr->pr_mtx); 873168401Spjd oldslots = pr->pr_slots; 874168401Spjd /* 875168401Spjd * We require setting slot to NULL after freeing it, 876168401Spjd * this way we can check for memory leaks here. 877168401Spjd */ 878168401Spjd KASSERT(oldslots[psrv->ps_slotno] == NULL, 879168401Spjd ("Slot %d (service %s, jailid=%d) still contains data?", 880168401Spjd psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 881168401Spjd if (psrv->ps_slotno > 0) { 882168401Spjd bcopy(oldslots, slots, 883168401Spjd sizeof(*slots) * prison_service_slots); 884168401Spjd } 885168401Spjd pr->pr_slots = slots; 886168401Spjd mtx_unlock(&pr->pr_mtx); 887168401Spjd KASSERT(oldslots != NULL, ("oldslots == NULL")); 888168401Spjd free(oldslots, M_PRISON); 889168401Spjd } 890168401Spjd } 891168401Spjd sx_sunlock(&allprison_lock); 892168401Spjd free(psrv, M_PRISON); 893168401Spjd} 894168401Spjd 895168401Spjd/* 896168401Spjd * Function sets data for the given jail in slot assigned for the given 897168401Spjd * jail service. 898168401Spjd */ 899168401Spjdvoid 900168401Spjdprison_service_data_set(struct prison_service *psrv, struct prison *pr, 901168401Spjd void *data) 902168401Spjd{ 903168401Spjd 904168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 905168401Spjd pr->pr_slots[psrv->ps_slotno] = data; 906168401Spjd} 907168401Spjd 908168401Spjd/* 909168401Spjd * Function clears slots assigned for the given jail service in the given 910168401Spjd * prison structure and returns current slot data. 911168401Spjd */ 912168401Spjdvoid * 913168401Spjdprison_service_data_del(struct prison_service *psrv, struct prison *pr) 914168401Spjd{ 915168401Spjd void *data; 916168401Spjd 917168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 918168401Spjd data = pr->pr_slots[psrv->ps_slotno]; 919168401Spjd pr->pr_slots[psrv->ps_slotno] = NULL; 920168401Spjd return (data); 921168401Spjd} 922168401Spjd 923168401Spjd/* 924168401Spjd * Function returns current data from the slot assigned to the given jail 925168401Spjd * service for the given jail. 926168401Spjd */ 927168401Spjdvoid * 928168401Spjdprison_service_data_get(struct prison_service *psrv, struct prison *pr) 929168401Spjd{ 930168401Spjd 931168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 932168401Spjd return (pr->pr_slots[psrv->ps_slotno]); 933168401Spjd} 934168401Spjd 935113275Smikestatic int 936113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS) 937113275Smike{ 938113275Smike struct xprison *xp, *sxp; 939113275Smike struct prison *pr; 940113275Smike int count, error; 941113275Smike 942127020Spjd if (jailed(req->td->td_ucred)) 943125806Srwatson return (0); 944113275Smike 945168401Spjd sx_slock(&allprison_lock); 946168401Spjd if ((count = prisoncount) == 0) { 947168401Spjd sx_sunlock(&allprison_lock); 948113275Smike return (0); 949168401Spjd } 950113275Smike 951113275Smike sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 952167309Spjd 953113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 954113275Smike xp->pr_version = XPRISON_VERSION; 955113275Smike xp->pr_id = pr->pr_id; 956168487Spjd xp->pr_ip = pr->pr_ip; 957113275Smike strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 958168487Spjd mtx_lock(&pr->pr_mtx); 959113275Smike strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 960113275Smike mtx_unlock(&pr->pr_mtx); 961113275Smike xp++; 962113275Smike } 963168401Spjd sx_sunlock(&allprison_lock); 964113275Smike 965113275Smike error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 966113275Smike free(sxp, M_TEMP); 967167354Spjd return (error); 968113275Smike} 969113275Smike 970113275SmikeSYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 971113275Smike NULL, 0, sysctl_jail_list, "S", "List of active jails"); 972126004Spjd 973126004Spjdstatic int 974126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 975126004Spjd{ 976126004Spjd int error, injail; 977126004Spjd 978126004Spjd injail = jailed(req->td->td_ucred); 979126004Spjd error = SYSCTL_OUT(req, &injail, sizeof(injail)); 980126004Spjd 981126004Spjd return (error); 982126004Spjd} 983126004SpjdSYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 984126004Spjd NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); 985