kern_jail.c revision 196019
1139804Simp/*- 2185435Sbz * Copyright (c) 1999 Poul-Henning Kamp. 3185435Sbz * Copyright (c) 2008 Bjoern A. Zeeb. 4191673Sjamie * Copyright (c) 2009 James Gritton. 5185435Sbz * All rights reserved. 6190466Sjamie * 7185404Sbz * Redistribution and use in source and binary forms, with or without 8185404Sbz * modification, are permitted provided that the following conditions 9185404Sbz * are met: 10185404Sbz * 1. Redistributions of source code must retain the above copyright 11185404Sbz * notice, this list of conditions and the following disclaimer. 12185404Sbz * 2. Redistributions in binary form must reproduce the above copyright 13185404Sbz * notice, this list of conditions and the following disclaimer in the 14185404Sbz * documentation and/or other materials provided with the distribution. 15185404Sbz * 16185404Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17185404Sbz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18185404Sbz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19185404Sbz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20185404Sbz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21185404Sbz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22185404Sbz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23185404Sbz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24185404Sbz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25185404Sbz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26185404Sbz * SUCH DAMAGE. 2746197Sphk */ 2846155Sphk 29116182Sobrien#include <sys/cdefs.h> 30116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 196019 2009-08-01 19:26:27Z rwatson $"); 31116182Sobrien 32193066Sjamie#include "opt_compat.h" 33185435Sbz#include "opt_ddb.h" 34185435Sbz#include "opt_inet.h" 35185435Sbz#include "opt_inet6.h" 36131177Spjd 3746155Sphk#include <sys/param.h> 3846155Sphk#include <sys/types.h> 3946155Sphk#include <sys/kernel.h> 4046155Sphk#include <sys/systm.h> 4146155Sphk#include <sys/errno.h> 4246155Sphk#include <sys/sysproto.h> 4346155Sphk#include <sys/malloc.h> 44192895Sjamie#include <sys/osd.h> 45164032Srwatson#include <sys/priv.h> 4646155Sphk#include <sys/proc.h> 47124882Srwatson#include <sys/taskqueue.h> 48177785Skib#include <sys/fcntl.h> 4946155Sphk#include <sys/jail.h> 5087275Srwatson#include <sys/lock.h> 5187275Srwatson#include <sys/mutex.h> 52168401Spjd#include <sys/sx.h> 53193066Sjamie#include <sys/sysent.h> 54113275Smike#include <sys/namei.h> 55147185Spjd#include <sys/mount.h> 56113275Smike#include <sys/queue.h> 5746155Sphk#include <sys/socket.h> 58113275Smike#include <sys/syscallsubr.h> 5957163Srwatson#include <sys/sysctl.h> 60113275Smike#include <sys/vnode.h> 61196019Srwatson 6246155Sphk#include <net/if.h> 63196019Srwatson#include <net/vnet.h> 64196019Srwatson 6546155Sphk#include <netinet/in.h> 66196019Srwatson 67185435Sbz#ifdef DDB 68185435Sbz#include <ddb/ddb.h> 69185435Sbz#ifdef INET6 70185435Sbz#include <netinet6/in6_var.h> 71185435Sbz#endif /* INET6 */ 72185435Sbz#endif /* DDB */ 7346155Sphk 74163606Srwatson#include <security/mac/mac_framework.h> 75163606Srwatson 76195944Sjamie#define DEFAULT_HOSTUUID "00000000-0000-0000-0000-000000000000" 77195944Sjamie 7846155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 7946155Sphk 80192895Sjamie/* prison0 describes what is "real" about the system. */ 81192895Sjamiestruct prison prison0 = { 82192895Sjamie .pr_id = 0, 83192895Sjamie .pr_name = "0", 84192895Sjamie .pr_ref = 1, 85192895Sjamie .pr_uref = 1, 86192895Sjamie .pr_path = "/", 87192895Sjamie .pr_securelevel = -1, 88194762Sjamie .pr_childmax = JAIL_MAX, 89195944Sjamie .pr_hostuuid = DEFAULT_HOSTUUID, 90192895Sjamie .pr_children = LIST_HEAD_INITIALIZER(&prison0.pr_children), 91193066Sjamie .pr_flags = PR_HOST, 92192895Sjamie .pr_allow = PR_ALLOW_ALL, 93192895Sjamie}; 94192895SjamieMTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF); 9557163Srwatson 96192895Sjamie/* allprison and lastprid are protected by allprison_lock. */ 97168401Spjdstruct sx allprison_lock; 98191673SjamieSX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); 99191673Sjamiestruct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); 100179881Sdelphijint lastprid = 0; 101113275Smike 102191673Sjamiestatic int do_jail_attach(struct thread *td, struct prison *pr); 103190466Sjamiestatic void prison_complete(void *context, int pending); 104191673Sjamiestatic void prison_deref(struct prison *pr, int flags); 105192895Sjamiestatic char *prison_path(struct prison *pr1, struct prison *pr2); 106192895Sjamiestatic void prison_remove_one(struct prison *pr); 107185435Sbz#ifdef INET 108190466Sjamiestatic int _prison_check_ip4(struct prison *pr, struct in_addr *ia); 109192895Sjamiestatic int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4); 110185435Sbz#endif 111185435Sbz#ifdef INET6 112190466Sjamiestatic int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); 113192895Sjamiestatic int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6); 114185435Sbz#endif 115113275Smike 116191673Sjamie/* Flags for prison_deref */ 117191673Sjamie#define PD_DEREF 0x01 118191673Sjamie#define PD_DEUREF 0x02 119191673Sjamie#define PD_LOCKED 0x04 120191673Sjamie#define PD_LIST_SLOCKED 0x08 121191673Sjamie#define PD_LIST_XLOCKED 0x10 122113275Smike 123192895Sjamie/* 124192895Sjamie * Parameter names corresponding to PR_* flag values 125192895Sjamie */ 126192895Sjamiestatic char *pr_flag_names[] = { 127192895Sjamie [0] = "persist", 128192895Sjamie}; 129192895Sjamie 130192895Sjamiestatic char *pr_flag_nonames[] = { 131192895Sjamie [0] = "nopersist", 132195870Sjamie}; 133195870Sjamie 134195870Sjamiestruct jailsys_flags { 135195870Sjamie const char *name; 136195870Sjamie unsigned disable; 137195870Sjamie unsigned new; 138195870Sjamie} pr_flag_jailsys[] = { 139195870Sjamie { "host", 0, PR_HOST }, 140195870Sjamie#ifdef VIMAGE 141195870Sjamie { "vnet", 0, PR_VNET }, 142195870Sjamie#endif 143192895Sjamie#ifdef INET 144195870Sjamie { "ip4", PR_IP4_USER | PR_IP4_DISABLE, PR_IP4_USER }, 145192895Sjamie#endif 146192895Sjamie#ifdef INET6 147195870Sjamie { "ip6", PR_IP6_USER | PR_IP6_DISABLE, PR_IP6_USER }, 148192895Sjamie#endif 149192895Sjamie}; 150192895Sjamie 151192895Sjamiestatic char *pr_allow_names[] = { 152192895Sjamie "allow.set_hostname", 153192895Sjamie "allow.sysvipc", 154192895Sjamie "allow.raw_sockets", 155192895Sjamie "allow.chflags", 156192895Sjamie "allow.mount", 157192895Sjamie "allow.quotas", 158192895Sjamie "allow.socket_af", 159192895Sjamie}; 160192895Sjamie 161192895Sjamiestatic char *pr_allow_nonames[] = { 162192895Sjamie "allow.noset_hostname", 163192895Sjamie "allow.nosysvipc", 164192895Sjamie "allow.noraw_sockets", 165192895Sjamie "allow.nochflags", 166192895Sjamie "allow.nomount", 167192895Sjamie "allow.noquotas", 168192895Sjamie "allow.nosocket_af", 169192895Sjamie}; 170192895Sjamie 171196002Sjamie#define JAIL_DEFAULT_ALLOW PR_ALLOW_SET_HOSTNAME 172196002Sjamie#define JAIL_DEFAULT_ENFORCE_STATFS 2 173192895Sjamiestatic unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; 174196002Sjamiestatic int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 175192895Sjamie#if defined(INET) || defined(INET6) 176193865Sjamiestatic unsigned jail_max_af_ips = 255; 177192895Sjamie#endif 178192895Sjamie 179192895Sjamie#ifdef INET 180185435Sbzstatic int 181185435Sbzqcmp_v4(const void *ip1, const void *ip2) 182185435Sbz{ 183185435Sbz in_addr_t iaa, iab; 184185435Sbz 185185435Sbz /* 186185435Sbz * We need to compare in HBO here to get the list sorted as expected 187185435Sbz * by the result of the code. Sorting NBO addresses gives you 188185435Sbz * interesting results. If you do not understand, do not try. 189185435Sbz */ 190185435Sbz iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 191185435Sbz iab = ntohl(((const struct in_addr *)ip2)->s_addr); 192185435Sbz 193185435Sbz /* 194185435Sbz * Do not simply return the difference of the two numbers, the int is 195185435Sbz * not wide enough. 196185435Sbz */ 197185435Sbz if (iaa > iab) 198185435Sbz return (1); 199185435Sbz else if (iaa < iab) 200185435Sbz return (-1); 201185435Sbz else 202185435Sbz return (0); 203185435Sbz} 204185435Sbz#endif 205185435Sbz 206185435Sbz#ifdef INET6 207185435Sbzstatic int 208185435Sbzqcmp_v6(const void *ip1, const void *ip2) 209185435Sbz{ 210185435Sbz const struct in6_addr *ia6a, *ia6b; 211185435Sbz int i, rc; 212185435Sbz 213185435Sbz ia6a = (const struct in6_addr *)ip1; 214185435Sbz ia6b = (const struct in6_addr *)ip2; 215185435Sbz 216185435Sbz rc = 0; 217190466Sjamie for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { 218185435Sbz if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) 219185435Sbz rc = 1; 220185435Sbz else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) 221185435Sbz rc = -1; 222185435Sbz } 223185435Sbz return (rc); 224185435Sbz} 225185435Sbz#endif 226185435Sbz 227191673Sjamie/* 228191673Sjamie * struct jail_args { 229191673Sjamie * struct jail *jail; 230191673Sjamie * }; 231191673Sjamie */ 232191673Sjamieint 233191673Sjamiejail(struct thread *td, struct jail_args *uap) 234185435Sbz{ 235191673Sjamie uint32_t version; 236191673Sjamie int error; 237192895Sjamie struct jail j; 238185435Sbz 239191673Sjamie error = copyin(uap->jail, &version, sizeof(uint32_t)); 240191673Sjamie if (error) 241191673Sjamie return (error); 242185435Sbz 243191673Sjamie switch (version) { 244191673Sjamie case 0: 245191673Sjamie { 246191673Sjamie struct jail_v0 j0; 247185435Sbz 248192895Sjamie /* FreeBSD single IPv4 jails. */ 249192895Sjamie bzero(&j, sizeof(struct jail)); 250191673Sjamie error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); 251191673Sjamie if (error) 252191673Sjamie return (error); 253192895Sjamie j.version = j0.version; 254192895Sjamie j.path = j0.path; 255192895Sjamie j.hostname = j0.hostname; 256192895Sjamie j.ip4s = j0.ip_number; 257191673Sjamie break; 258191673Sjamie } 259191673Sjamie 260191673Sjamie case 1: 261185435Sbz /* 262191673Sjamie * Version 1 was used by multi-IPv4 jail implementations 263191673Sjamie * that never made it into the official kernel. 264185435Sbz */ 265191673Sjamie return (EINVAL); 266185435Sbz 267191673Sjamie case 2: /* JAIL_API_VERSION */ 268191673Sjamie /* FreeBSD multi-IPv4/IPv6,noIP jails. */ 269191673Sjamie error = copyin(uap->jail, &j, sizeof(struct jail)); 270191673Sjamie if (error) 271191673Sjamie return (error); 272192895Sjamie break; 273192895Sjamie 274192895Sjamie default: 275192895Sjamie /* Sci-Fi jails are not supported, sorry. */ 276192895Sjamie return (EINVAL); 277192895Sjamie } 278192895Sjamie return (kern_jail(td, &j)); 279192895Sjamie} 280192895Sjamie 281192895Sjamieint 282192895Sjamiekern_jail(struct thread *td, struct jail *j) 283192895Sjamie{ 284193865Sjamie struct iovec optiov[2 * (4 285193865Sjamie + sizeof(pr_allow_names) / sizeof(pr_allow_names[0]) 286193865Sjamie#ifdef INET 287193865Sjamie + 1 288193865Sjamie#endif 289193865Sjamie#ifdef INET6 290193865Sjamie + 1 291193865Sjamie#endif 292193865Sjamie )]; 293192895Sjamie struct uio opt; 294192895Sjamie char *u_path, *u_hostname, *u_name; 295185435Sbz#ifdef INET 296193865Sjamie uint32_t ip4s; 297192895Sjamie struct in_addr *u_ip4; 298192895Sjamie#endif 299192895Sjamie#ifdef INET6 300192895Sjamie struct in6_addr *u_ip6; 301192895Sjamie#endif 302192895Sjamie size_t tmplen; 303192895Sjamie int error, enforce_statfs, fi; 304192895Sjamie 305192895Sjamie bzero(&optiov, sizeof(optiov)); 306192895Sjamie opt.uio_iov = optiov; 307192895Sjamie opt.uio_iovcnt = 0; 308192895Sjamie opt.uio_offset = -1; 309192895Sjamie opt.uio_resid = -1; 310192895Sjamie opt.uio_segflg = UIO_SYSSPACE; 311192895Sjamie opt.uio_rw = UIO_READ; 312192895Sjamie opt.uio_td = td; 313192895Sjamie 314192895Sjamie /* Set permissions for top-level jails from sysctls. */ 315192895Sjamie if (!jailed(td->td_ucred)) { 316192895Sjamie for (fi = 0; fi < sizeof(pr_allow_names) / 317192895Sjamie sizeof(pr_allow_names[0]); fi++) { 318192895Sjamie optiov[opt.uio_iovcnt].iov_base = 319192895Sjamie (jail_default_allow & (1 << fi)) 320192895Sjamie ? pr_allow_names[fi] : pr_allow_nonames[fi]; 321192895Sjamie optiov[opt.uio_iovcnt].iov_len = 322192895Sjamie strlen(optiov[opt.uio_iovcnt].iov_base) + 1; 323192895Sjamie opt.uio_iovcnt += 2; 324192895Sjamie } 325192895Sjamie optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; 326192895Sjamie optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); 327192895Sjamie opt.uio_iovcnt++; 328192895Sjamie enforce_statfs = jail_default_enforce_statfs; 329192895Sjamie optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; 330192895Sjamie optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); 331192895Sjamie opt.uio_iovcnt++; 332192895Sjamie } 333192895Sjamie 334192895Sjamie tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; 335192895Sjamie#ifdef INET 336192895Sjamie ip4s = (j->version == 0) ? 1 : j->ip4s; 337192895Sjamie if (ip4s > jail_max_af_ips) 338192895Sjamie return (EINVAL); 339192895Sjamie tmplen += ip4s * sizeof(struct in_addr); 340191673Sjamie#else 341192895Sjamie if (j->ip4s > 0) 342192895Sjamie return (EINVAL); 343191673Sjamie#endif 344191673Sjamie#ifdef INET6 345192895Sjamie if (j->ip6s > jail_max_af_ips) 346192895Sjamie return (EINVAL); 347192895Sjamie tmplen += j->ip6s * sizeof(struct in6_addr); 348191673Sjamie#else 349192895Sjamie if (j->ip6s > 0) 350192895Sjamie return (EINVAL); 351191673Sjamie#endif 352192895Sjamie u_path = malloc(tmplen, M_TEMP, M_WAITOK); 353192895Sjamie u_hostname = u_path + MAXPATHLEN; 354192895Sjamie u_name = u_hostname + MAXHOSTNAMELEN; 355191673Sjamie#ifdef INET 356192895Sjamie u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); 357191673Sjamie#endif 358191673Sjamie#ifdef INET6 359191673Sjamie#ifdef INET 360192895Sjamie u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); 361191673Sjamie#else 362192895Sjamie u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); 363191673Sjamie#endif 364191673Sjamie#endif 365192895Sjamie optiov[opt.uio_iovcnt].iov_base = "path"; 366192895Sjamie optiov[opt.uio_iovcnt].iov_len = sizeof("path"); 367192895Sjamie opt.uio_iovcnt++; 368192895Sjamie optiov[opt.uio_iovcnt].iov_base = u_path; 369192895Sjamie error = copyinstr(j->path, u_path, MAXPATHLEN, 370192895Sjamie &optiov[opt.uio_iovcnt].iov_len); 371192895Sjamie if (error) { 372192895Sjamie free(u_path, M_TEMP); 373192895Sjamie return (error); 374192895Sjamie } 375192895Sjamie opt.uio_iovcnt++; 376192895Sjamie optiov[opt.uio_iovcnt].iov_base = "host.hostname"; 377192895Sjamie optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); 378192895Sjamie opt.uio_iovcnt++; 379192895Sjamie optiov[opt.uio_iovcnt].iov_base = u_hostname; 380192895Sjamie error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, 381192895Sjamie &optiov[opt.uio_iovcnt].iov_len); 382192895Sjamie if (error) { 383192895Sjamie free(u_path, M_TEMP); 384192895Sjamie return (error); 385192895Sjamie } 386192895Sjamie opt.uio_iovcnt++; 387192895Sjamie if (j->jailname != NULL) { 388192895Sjamie optiov[opt.uio_iovcnt].iov_base = "name"; 389192895Sjamie optiov[opt.uio_iovcnt].iov_len = sizeof("name"); 390192895Sjamie opt.uio_iovcnt++; 391192895Sjamie optiov[opt.uio_iovcnt].iov_base = u_name; 392192895Sjamie error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, 393192895Sjamie &optiov[opt.uio_iovcnt].iov_len); 394191673Sjamie if (error) { 395191673Sjamie free(u_path, M_TEMP); 396191673Sjamie return (error); 397191673Sjamie } 398192895Sjamie opt.uio_iovcnt++; 399192895Sjamie } 400191673Sjamie#ifdef INET 401192895Sjamie optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 402192895Sjamie optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 403192895Sjamie opt.uio_iovcnt++; 404192895Sjamie optiov[opt.uio_iovcnt].iov_base = u_ip4; 405192895Sjamie optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); 406192895Sjamie if (j->version == 0) 407192895Sjamie u_ip4->s_addr = j->ip4s; 408192895Sjamie else { 409192895Sjamie error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); 410191673Sjamie if (error) { 411191673Sjamie free(u_path, M_TEMP); 412191673Sjamie return (error); 413191673Sjamie } 414192895Sjamie } 415192895Sjamie opt.uio_iovcnt++; 416185435Sbz#endif 417185435Sbz#ifdef INET6 418192895Sjamie optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; 419192895Sjamie optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); 420192895Sjamie opt.uio_iovcnt++; 421192895Sjamie optiov[opt.uio_iovcnt].iov_base = u_ip6; 422192895Sjamie optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); 423192895Sjamie error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); 424192895Sjamie if (error) { 425192895Sjamie free(u_path, M_TEMP); 426192895Sjamie return (error); 427192895Sjamie } 428192895Sjamie opt.uio_iovcnt++; 429185435Sbz#endif 430192895Sjamie KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]), 431192895Sjamie ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); 432191673Sjamie error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); 433191673Sjamie free(u_path, M_TEMP); 434191673Sjamie return (error); 435185435Sbz} 436185435Sbz 437192895Sjamie 438191673Sjamie/* 439191673Sjamie * struct jail_set_args { 440191673Sjamie * struct iovec *iovp; 441191673Sjamie * unsigned int iovcnt; 442191673Sjamie * int flags; 443191673Sjamie * }; 444191673Sjamie */ 445191673Sjamieint 446191673Sjamiejail_set(struct thread *td, struct jail_set_args *uap) 447185435Sbz{ 448191673Sjamie struct uio *auio; 449191673Sjamie int error; 450191673Sjamie 451191673Sjamie /* Check that we have an even number of iovecs. */ 452191673Sjamie if (uap->iovcnt & 1) 453191673Sjamie return (EINVAL); 454191673Sjamie 455191673Sjamie error = copyinuio(uap->iovp, uap->iovcnt, &auio); 456191673Sjamie if (error) 457191673Sjamie return (error); 458191673Sjamie error = kern_jail_set(td, auio, uap->flags); 459191673Sjamie free(auio, M_IOV); 460191673Sjamie return (error); 461191673Sjamie} 462191673Sjamie 463191673Sjamieint 464191673Sjamiekern_jail_set(struct thread *td, struct uio *optuio, int flags) 465191673Sjamie{ 466191673Sjamie struct nameidata nd; 467185435Sbz#ifdef INET 468190466Sjamie struct in_addr *ip4; 469185435Sbz#endif 470185435Sbz#ifdef INET6 471185435Sbz struct in6_addr *ip6; 472185435Sbz#endif 473191673Sjamie struct vfsopt *opt; 474191673Sjamie struct vfsoptlist *opts; 475195945Sjamie struct prison *pr, *deadpr, *mypr, *ppr, *tpr, *tppr; 476191673Sjamie struct vnode *root; 477193066Sjamie char *domain, *errmsg, *host, *name, *p, *path, *uuid; 478192895Sjamie#if defined(INET) || defined(INET6) 479191673Sjamie void *op; 480192895Sjamie#endif 481193066Sjamie unsigned long hid; 482192895Sjamie size_t namelen, onamelen; 483192895Sjamie int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos; 484195870Sjamie int gotchildmax, gotenforce, gothid, gotslevel; 485195870Sjamie int fi, jid, jsys, len, level; 486194762Sjamie int childmax, slevel, vfslocked; 487191673Sjamie#if defined(INET) || defined(INET6) 488192895Sjamie int ii, ij; 489191673Sjamie#endif 490191673Sjamie#ifdef INET 491195974Sjamie int ip4s, redo_ip4; 492191673Sjamie#endif 493191673Sjamie#ifdef INET6 494195974Sjamie int ip6s, redo_ip6; 495191673Sjamie#endif 496191673Sjamie unsigned pr_flags, ch_flags; 497192895Sjamie unsigned pr_allow, ch_allow, tallow; 498191673Sjamie char numbuf[12]; 499185435Sbz 500191673Sjamie error = priv_check(td, PRIV_JAIL_SET); 501191673Sjamie if (!error && (flags & JAIL_ATTACH)) 502191673Sjamie error = priv_check(td, PRIV_JAIL_ATTACH); 503191673Sjamie if (error) 504191673Sjamie return (error); 505192895Sjamie mypr = ppr = td->td_ucred->cr_prison; 506194762Sjamie if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) 507192895Sjamie return (EPERM); 508191673Sjamie if (flags & ~JAIL_SET_MASK) 509191673Sjamie return (EINVAL); 510191673Sjamie 511185435Sbz /* 512191673Sjamie * Check all the parameters before committing to anything. Not all 513191673Sjamie * errors can be caught early, but we may as well try. Also, this 514191673Sjamie * takes care of some expensive stuff (path lookup) before getting 515191673Sjamie * the allprison lock. 516185435Sbz * 517191673Sjamie * XXX Jails are not filesystems, and jail parameters are not mount 518191673Sjamie * options. But it makes more sense to re-use the vfsopt code 519191673Sjamie * than duplicate it under a different name. 520185435Sbz */ 521191673Sjamie error = vfs_buildopts(optuio, &opts); 522191673Sjamie if (error) 523191673Sjamie return (error); 524185435Sbz#ifdef INET 525185435Sbz ip4 = NULL; 526185435Sbz#endif 527185435Sbz#ifdef INET6 528185435Sbz ip6 = NULL; 529185435Sbz#endif 530191673Sjamie 531191673Sjamie error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 532191673Sjamie if (error == ENOENT) 533191673Sjamie jid = 0; 534191673Sjamie else if (error != 0) 535191673Sjamie goto done_free; 536191673Sjamie 537191673Sjamie error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); 538191673Sjamie if (error == ENOENT) 539191673Sjamie gotslevel = 0; 540191673Sjamie else if (error != 0) 541191673Sjamie goto done_free; 542191673Sjamie else 543191673Sjamie gotslevel = 1; 544191673Sjamie 545194762Sjamie error = 546194762Sjamie vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax)); 547194762Sjamie if (error == ENOENT) 548194762Sjamie gotchildmax = 0; 549194762Sjamie else if (error != 0) 550194762Sjamie goto done_free; 551194762Sjamie else 552194762Sjamie gotchildmax = 1; 553194762Sjamie 554192895Sjamie error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); 555192895Sjamie gotenforce = (error == 0); 556192895Sjamie if (gotenforce) { 557192895Sjamie if (enforce < 0 || enforce > 2) 558192895Sjamie return (EINVAL); 559192895Sjamie } else if (error != ENOENT) 560192895Sjamie goto done_free; 561192895Sjamie 562191673Sjamie pr_flags = ch_flags = 0; 563192895Sjamie for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 564192895Sjamie fi++) { 565192895Sjamie if (pr_flag_names[fi] == NULL) 566192895Sjamie continue; 567192895Sjamie vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi); 568192895Sjamie vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi); 569192895Sjamie } 570191673Sjamie ch_flags |= pr_flags; 571195870Sjamie for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 572195870Sjamie fi++) { 573195870Sjamie error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys, 574195870Sjamie sizeof(jsys)); 575195870Sjamie if (error == ENOENT) 576195870Sjamie continue; 577195870Sjamie if (error != 0) 578195870Sjamie goto done_free; 579195870Sjamie switch (jsys) { 580195870Sjamie case JAIL_SYS_DISABLE: 581195870Sjamie if (!pr_flag_jailsys[fi].disable) { 582195870Sjamie error = EINVAL; 583195870Sjamie goto done_free; 584195870Sjamie } 585195870Sjamie pr_flags |= pr_flag_jailsys[fi].disable; 586195870Sjamie break; 587195870Sjamie case JAIL_SYS_NEW: 588195870Sjamie pr_flags |= pr_flag_jailsys[fi].new; 589195870Sjamie break; 590195870Sjamie case JAIL_SYS_INHERIT: 591195870Sjamie break; 592195870Sjamie default: 593195870Sjamie error = EINVAL; 594195870Sjamie goto done_free; 595195870Sjamie } 596195870Sjamie ch_flags |= 597195870Sjamie pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable; 598195870Sjamie } 599191673Sjamie if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE 600191673Sjamie && !(pr_flags & PR_PERSIST)) { 601191673Sjamie error = EINVAL; 602191673Sjamie vfs_opterror(opts, "new jail must persist or attach"); 603191673Sjamie goto done_errmsg; 604191673Sjamie } 605194251Sjamie#ifdef VIMAGE 606194251Sjamie if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) { 607194251Sjamie error = EINVAL; 608194251Sjamie vfs_opterror(opts, "vnet cannot be changed after creation"); 609194251Sjamie goto done_errmsg; 610194251Sjamie } 611194251Sjamie#endif 612195974Sjamie#ifdef INET 613195974Sjamie if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) { 614195974Sjamie error = EINVAL; 615195974Sjamie vfs_opterror(opts, "ip4 cannot be changed after creation"); 616195974Sjamie goto done_errmsg; 617195974Sjamie } 618195974Sjamie#endif 619195974Sjamie#ifdef INET6 620195974Sjamie if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) { 621195974Sjamie error = EINVAL; 622195974Sjamie vfs_opterror(opts, "ip6 cannot be changed after creation"); 623195974Sjamie goto done_errmsg; 624195974Sjamie } 625195974Sjamie#endif 626191673Sjamie 627192895Sjamie pr_allow = ch_allow = 0; 628192895Sjamie for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 629192895Sjamie fi++) { 630192895Sjamie vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi); 631192895Sjamie vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi); 632192895Sjamie } 633192895Sjamie ch_allow |= pr_allow; 634192895Sjamie 635191673Sjamie error = vfs_getopt(opts, "name", (void **)&name, &len); 636191673Sjamie if (error == ENOENT) 637191673Sjamie name = NULL; 638191673Sjamie else if (error != 0) 639191673Sjamie goto done_free; 640191673Sjamie else { 641191673Sjamie if (len == 0 || name[len - 1] != '\0') { 642191673Sjamie error = EINVAL; 643191673Sjamie goto done_free; 644191673Sjamie } 645191673Sjamie if (len > MAXHOSTNAMELEN) { 646191673Sjamie error = ENAMETOOLONG; 647191673Sjamie goto done_free; 648191673Sjamie } 649191673Sjamie } 650191673Sjamie 651191673Sjamie error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); 652191673Sjamie if (error == ENOENT) 653191673Sjamie host = NULL; 654191673Sjamie else if (error != 0) 655191673Sjamie goto done_free; 656191673Sjamie else { 657193066Sjamie ch_flags |= PR_HOST; 658193066Sjamie pr_flags |= PR_HOST; 659191673Sjamie if (len == 0 || host[len - 1] != '\0') { 660191673Sjamie error = EINVAL; 661191673Sjamie goto done_free; 662191673Sjamie } 663191673Sjamie if (len > MAXHOSTNAMELEN) { 664191673Sjamie error = ENAMETOOLONG; 665191673Sjamie goto done_free; 666191673Sjamie } 667191673Sjamie } 668191673Sjamie 669193066Sjamie error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); 670193066Sjamie if (error == ENOENT) 671193066Sjamie domain = NULL; 672193066Sjamie else if (error != 0) 673193066Sjamie goto done_free; 674193066Sjamie else { 675193066Sjamie ch_flags |= PR_HOST; 676193066Sjamie pr_flags |= PR_HOST; 677193066Sjamie if (len == 0 || domain[len - 1] != '\0') { 678193066Sjamie error = EINVAL; 679193066Sjamie goto done_free; 680193066Sjamie } 681193066Sjamie if (len > MAXHOSTNAMELEN) { 682193066Sjamie error = ENAMETOOLONG; 683193066Sjamie goto done_free; 684193066Sjamie } 685193066Sjamie } 686193066Sjamie 687193066Sjamie error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); 688193066Sjamie if (error == ENOENT) 689193066Sjamie uuid = NULL; 690193066Sjamie else if (error != 0) 691193066Sjamie goto done_free; 692193066Sjamie else { 693193066Sjamie ch_flags |= PR_HOST; 694193066Sjamie pr_flags |= PR_HOST; 695193066Sjamie if (len == 0 || uuid[len - 1] != '\0') { 696193066Sjamie error = EINVAL; 697193066Sjamie goto done_free; 698193066Sjamie } 699193066Sjamie if (len > HOSTUUIDLEN) { 700193066Sjamie error = ENAMETOOLONG; 701193066Sjamie goto done_free; 702193066Sjamie } 703193066Sjamie } 704193066Sjamie 705193066Sjamie#ifdef COMPAT_IA32 706193066Sjamie if (td->td_proc->p_sysent->sv_flags & SV_IA32) { 707193066Sjamie uint32_t hid32; 708193066Sjamie 709193066Sjamie error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); 710193066Sjamie hid = hid32; 711193066Sjamie } else 712193066Sjamie#endif 713193066Sjamie error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); 714193066Sjamie if (error == ENOENT) 715193066Sjamie gothid = 0; 716193066Sjamie else if (error != 0) 717193066Sjamie goto done_free; 718193066Sjamie else { 719193066Sjamie gothid = 1; 720193066Sjamie ch_flags |= PR_HOST; 721193066Sjamie pr_flags |= PR_HOST; 722193066Sjamie } 723193066Sjamie 724185435Sbz#ifdef INET 725191673Sjamie error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); 726191673Sjamie if (error == ENOENT) 727195870Sjamie ip4s = (pr_flags & PR_IP4_DISABLE) ? 0 : -1; 728191673Sjamie else if (error != 0) 729191673Sjamie goto done_free; 730191673Sjamie else if (ip4s & (sizeof(*ip4) - 1)) { 731191673Sjamie error = EINVAL; 732191673Sjamie goto done_free; 733192895Sjamie } else { 734195870Sjamie ch_flags |= PR_IP4_USER | PR_IP4_DISABLE; 735195870Sjamie if (ip4s == 0) 736195870Sjamie pr_flags |= PR_IP4_USER | PR_IP4_DISABLE; 737195870Sjamie else { 738195870Sjamie pr_flags = (pr_flags & ~PR_IP4_DISABLE) | PR_IP4_USER; 739192895Sjamie ip4s /= sizeof(*ip4); 740192895Sjamie if (ip4s > jail_max_af_ips) { 741185435Sbz error = EINVAL; 742192895Sjamie vfs_opterror(opts, "too many IPv4 addresses"); 743192895Sjamie goto done_errmsg; 744185435Sbz } 745195974Sjamie ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 746192895Sjamie bcopy(op, ip4, ip4s * sizeof(*ip4)); 747192895Sjamie /* 748192895Sjamie * IP addresses are all sorted but ip[0] to preserve 749192895Sjamie * the primary IP address as given from userland. 750192895Sjamie * This special IP is used for unbound outgoing 751192895Sjamie * connections as well for "loopback" traffic. 752192895Sjamie */ 753192895Sjamie if (ip4s > 1) 754192895Sjamie qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); 755192895Sjamie /* 756192895Sjamie * Check for duplicate addresses and do some simple 757192895Sjamie * zero and broadcast checks. If users give other bogus 758192895Sjamie * addresses it is their problem. 759192895Sjamie * 760192895Sjamie * We do not have to care about byte order for these 761192895Sjamie * checks so we will do them in NBO. 762192895Sjamie */ 763192895Sjamie for (ii = 0; ii < ip4s; ii++) { 764192895Sjamie if (ip4[ii].s_addr == INADDR_ANY || 765192895Sjamie ip4[ii].s_addr == INADDR_BROADCAST) { 766192895Sjamie error = EINVAL; 767192895Sjamie goto done_free; 768192895Sjamie } 769192895Sjamie if ((ii+1) < ip4s && 770192895Sjamie (ip4[0].s_addr == ip4[ii+1].s_addr || 771192895Sjamie ip4[ii].s_addr == ip4[ii+1].s_addr)) { 772192895Sjamie error = EINVAL; 773192895Sjamie goto done_free; 774192895Sjamie } 775192895Sjamie } 776185435Sbz } 777191673Sjamie } 778191673Sjamie#endif 779185435Sbz 780185435Sbz#ifdef INET6 781191673Sjamie error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); 782191673Sjamie if (error == ENOENT) 783195870Sjamie ip6s = (pr_flags & PR_IP6_DISABLE) ? 0 : -1; 784191673Sjamie else if (error != 0) 785191673Sjamie goto done_free; 786191673Sjamie else if (ip6s & (sizeof(*ip6) - 1)) { 787191673Sjamie error = EINVAL; 788191673Sjamie goto done_free; 789192895Sjamie } else { 790195870Sjamie ch_flags |= PR_IP6_USER | PR_IP6_DISABLE; 791195870Sjamie if (ip6s == 0) 792195870Sjamie pr_flags |= PR_IP6_USER | PR_IP6_DISABLE; 793195870Sjamie else { 794195870Sjamie pr_flags = (pr_flags & ~PR_IP6_DISABLE) | PR_IP6_USER; 795192895Sjamie ip6s /= sizeof(*ip6); 796192895Sjamie if (ip6s > jail_max_af_ips) { 797185435Sbz error = EINVAL; 798192895Sjamie vfs_opterror(opts, "too many IPv6 addresses"); 799192895Sjamie goto done_errmsg; 800185435Sbz } 801195974Sjamie ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 802192895Sjamie bcopy(op, ip6, ip6s * sizeof(*ip6)); 803192895Sjamie if (ip6s > 1) 804192895Sjamie qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); 805192895Sjamie for (ii = 0; ii < ip6s; ii++) { 806192895Sjamie if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) { 807192895Sjamie error = EINVAL; 808192895Sjamie goto done_free; 809192895Sjamie } 810192895Sjamie if ((ii+1) < ip6s && 811192895Sjamie (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || 812192895Sjamie IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) 813192895Sjamie { 814192895Sjamie error = EINVAL; 815192895Sjamie goto done_free; 816192895Sjamie } 817192895Sjamie } 818185435Sbz } 819191673Sjamie } 820185435Sbz#endif 821185435Sbz 822195945Sjamie#if defined(VIMAGE) && (defined(INET) || defined(INET6)) 823195945Sjamie if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 824195945Sjamie error = EINVAL; 825195945Sjamie vfs_opterror(opts, 826195945Sjamie "vnet jails cannot have IP address restrictions"); 827195945Sjamie goto done_errmsg; 828195945Sjamie } 829195945Sjamie#endif 830195945Sjamie 831191673Sjamie root = NULL; 832191673Sjamie error = vfs_getopt(opts, "path", (void **)&path, &len); 833191673Sjamie if (error == ENOENT) 834191673Sjamie path = NULL; 835191673Sjamie else if (error != 0) 836191673Sjamie goto done_free; 837191673Sjamie else { 838191673Sjamie if (flags & JAIL_UPDATE) { 839191673Sjamie error = EINVAL; 840191673Sjamie vfs_opterror(opts, 841191673Sjamie "path cannot be changed after creation"); 842191673Sjamie goto done_errmsg; 843191673Sjamie } 844191673Sjamie if (len == 0 || path[len - 1] != '\0') { 845191673Sjamie error = EINVAL; 846191673Sjamie goto done_free; 847191673Sjamie } 848191673Sjamie if (len < 2 || (len == 2 && path[0] == '/')) 849191673Sjamie path = NULL; 850191673Sjamie else { 851192895Sjamie /* Leave room for a real-root full pathname. */ 852192895Sjamie if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") 853192895Sjamie ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { 854192895Sjamie error = ENAMETOOLONG; 855192895Sjamie goto done_free; 856192895Sjamie } 857191673Sjamie NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_SYSSPACE, 858191673Sjamie path, td); 859191673Sjamie error = namei(&nd); 860191673Sjamie if (error) 861191673Sjamie goto done_free; 862191673Sjamie vfslocked = NDHASGIANT(&nd); 863191673Sjamie root = nd.ni_vp; 864191673Sjamie NDFREE(&nd, NDF_ONLY_PNBUF); 865191673Sjamie if (root->v_type != VDIR) { 866191673Sjamie error = ENOTDIR; 867191673Sjamie vrele(root); 868191673Sjamie VFS_UNLOCK_GIANT(vfslocked); 869191673Sjamie goto done_free; 870191673Sjamie } 871191673Sjamie VFS_UNLOCK_GIANT(vfslocked); 872191673Sjamie } 873191673Sjamie } 874185435Sbz 875191673Sjamie /* 876191673Sjamie * Grab the allprison lock before letting modules check their 877191673Sjamie * parameters. Once we have it, do not let go so we'll have a 878191673Sjamie * consistent view of the OSD list. 879191673Sjamie */ 880191673Sjamie sx_xlock(&allprison_lock); 881191673Sjamie error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); 882191673Sjamie if (error) 883191673Sjamie goto done_unlock_list; 884185435Sbz 885191673Sjamie /* By now, all parameters should have been noted. */ 886191673Sjamie TAILQ_FOREACH(opt, opts, link) { 887191673Sjamie if (!opt->seen && strcmp(opt->name, "errmsg")) { 888191673Sjamie error = EINVAL; 889191673Sjamie vfs_opterror(opts, "unknown parameter: %s", opt->name); 890191673Sjamie goto done_unlock_list; 891191673Sjamie } 892191673Sjamie } 893191673Sjamie 894185435Sbz /* 895191673Sjamie * See if we are creating a new record or updating an existing one. 896191673Sjamie * This abuses the file error codes ENOENT and EEXIST. 897185435Sbz */ 898191673Sjamie cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); 899191673Sjamie if (!cuflags) { 900191673Sjamie error = EINVAL; 901191673Sjamie vfs_opterror(opts, "no valid operation (create or update)"); 902191673Sjamie goto done_unlock_list; 903191673Sjamie } 904191673Sjamie pr = NULL; 905191673Sjamie if (jid != 0) { 906192895Sjamie /* 907192895Sjamie * See if a requested jid already exists. There is an 908192895Sjamie * information leak here if the jid exists but is not within 909192895Sjamie * the caller's jail hierarchy. Jail creators will get EEXIST 910192895Sjamie * even though they cannot see the jail, and CREATE | UPDATE 911192895Sjamie * will return ENOENT which is not normally a valid error. 912192895Sjamie */ 913191673Sjamie if (jid < 0) { 914191673Sjamie error = EINVAL; 915191673Sjamie vfs_opterror(opts, "negative jid"); 916191673Sjamie goto done_unlock_list; 917191673Sjamie } 918191673Sjamie pr = prison_find(jid); 919191673Sjamie if (pr != NULL) { 920192895Sjamie ppr = pr->pr_parent; 921191673Sjamie /* Create: jid must not exist. */ 922191673Sjamie if (cuflags == JAIL_CREATE) { 923191673Sjamie mtx_unlock(&pr->pr_mtx); 924191673Sjamie error = EEXIST; 925191673Sjamie vfs_opterror(opts, "jail %d already exists", 926191673Sjamie jid); 927191673Sjamie goto done_unlock_list; 928191673Sjamie } 929192895Sjamie if (!prison_ischild(mypr, pr)) { 930192895Sjamie mtx_unlock(&pr->pr_mtx); 931192895Sjamie pr = NULL; 932192895Sjamie } else if (pr->pr_uref == 0) { 933191673Sjamie if (!(flags & JAIL_DYING)) { 934191673Sjamie mtx_unlock(&pr->pr_mtx); 935191673Sjamie error = ENOENT; 936191673Sjamie vfs_opterror(opts, "jail %d is dying", 937191673Sjamie jid); 938191673Sjamie goto done_unlock_list; 939191673Sjamie } else if ((flags & JAIL_ATTACH) || 940191673Sjamie (pr_flags & PR_PERSIST)) { 941191673Sjamie /* 942191673Sjamie * A dying jail might be resurrected 943191673Sjamie * (via attach or persist), but first 944191673Sjamie * it must determine if another jail 945191673Sjamie * has claimed its name. Accomplish 946191673Sjamie * this by implicitly re-setting the 947191673Sjamie * name. 948191673Sjamie */ 949191673Sjamie if (name == NULL) 950192895Sjamie name = prison_name(mypr, pr); 951191673Sjamie } 952191673Sjamie } 953191673Sjamie } 954191673Sjamie if (pr == NULL) { 955191673Sjamie /* Update: jid must exist. */ 956191673Sjamie if (cuflags == JAIL_UPDATE) { 957191673Sjamie error = ENOENT; 958191673Sjamie vfs_opterror(opts, "jail %d not found", jid); 959191673Sjamie goto done_unlock_list; 960191673Sjamie } 961191673Sjamie } 962191673Sjamie } 963191673Sjamie /* 964191673Sjamie * If the caller provided a name, look for a jail by that name. 965191673Sjamie * This has different semantics for creates and updates keyed by jid 966191673Sjamie * (where the name must not already exist in a different jail), 967191673Sjamie * and updates keyed by the name itself (where the name must exist 968191673Sjamie * because that is the jail being updated). 969191673Sjamie */ 970191673Sjamie if (name != NULL) { 971192895Sjamie p = strrchr(name, '.'); 972192895Sjamie if (p != NULL) { 973192895Sjamie /* 974192895Sjamie * This is a hierarchical name. Split it into the 975192895Sjamie * parent and child names, and make sure the parent 976192895Sjamie * exists or matches an already found jail. 977192895Sjamie */ 978192895Sjamie *p = '\0'; 979192895Sjamie if (pr != NULL) { 980192895Sjamie if (strncmp(name, ppr->pr_name, p - name) || 981192895Sjamie ppr->pr_name[p - name] != '\0') { 982192895Sjamie mtx_unlock(&pr->pr_mtx); 983192895Sjamie error = EINVAL; 984192895Sjamie vfs_opterror(opts, 985192895Sjamie "cannot change jail's parent"); 986192895Sjamie goto done_unlock_list; 987192895Sjamie } 988192895Sjamie } else { 989192895Sjamie ppr = prison_find_name(mypr, name); 990192895Sjamie if (ppr == NULL) { 991192895Sjamie error = ENOENT; 992192895Sjamie vfs_opterror(opts, 993192895Sjamie "jail \"%s\" not found", name); 994192895Sjamie goto done_unlock_list; 995192895Sjamie } 996192895Sjamie mtx_unlock(&ppr->pr_mtx); 997192895Sjamie } 998192895Sjamie name = p + 1; 999192895Sjamie } 1000191673Sjamie if (name[0] != '\0') { 1001192895Sjamie namelen = 1002192895Sjamie (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; 1003192895Sjamie name_again: 1004191673Sjamie deadpr = NULL; 1005192895Sjamie FOREACH_PRISON_CHILD(ppr, tpr) { 1006191673Sjamie if (tpr != pr && tpr->pr_ref > 0 && 1007192895Sjamie !strcmp(tpr->pr_name + namelen, name)) { 1008191673Sjamie if (pr == NULL && 1009191673Sjamie cuflags != JAIL_CREATE) { 1010191673Sjamie mtx_lock(&tpr->pr_mtx); 1011191673Sjamie if (tpr->pr_ref > 0) { 1012191673Sjamie /* 1013191673Sjamie * Use this jail 1014191673Sjamie * for updates. 1015191673Sjamie */ 1016191673Sjamie if (tpr->pr_uref > 0) { 1017191673Sjamie pr = tpr; 1018191673Sjamie break; 1019191673Sjamie } 1020191673Sjamie deadpr = tpr; 1021191673Sjamie } 1022191673Sjamie mtx_unlock(&tpr->pr_mtx); 1023191673Sjamie } else if (tpr->pr_uref > 0) { 1024191673Sjamie /* 1025191673Sjamie * Create, or update(jid): 1026191673Sjamie * name must not exist in an 1027192895Sjamie * active sibling jail. 1028191673Sjamie */ 1029191673Sjamie error = EEXIST; 1030191673Sjamie if (pr != NULL) 1031191673Sjamie mtx_unlock(&pr->pr_mtx); 1032191673Sjamie vfs_opterror(opts, 1033191673Sjamie "jail \"%s\" already exists", 1034191673Sjamie name); 1035191673Sjamie goto done_unlock_list; 1036191673Sjamie } 1037191673Sjamie } 1038191673Sjamie } 1039191673Sjamie /* If no active jail is found, use a dying one. */ 1040191673Sjamie if (deadpr != NULL && pr == NULL) { 1041191673Sjamie if (flags & JAIL_DYING) { 1042191673Sjamie mtx_lock(&deadpr->pr_mtx); 1043191673Sjamie if (deadpr->pr_ref == 0) { 1044191673Sjamie mtx_unlock(&deadpr->pr_mtx); 1045191673Sjamie goto name_again; 1046191673Sjamie } 1047191673Sjamie pr = deadpr; 1048191673Sjamie } else if (cuflags == JAIL_UPDATE) { 1049191673Sjamie error = ENOENT; 1050191673Sjamie vfs_opterror(opts, 1051191673Sjamie "jail \"%s\" is dying", name); 1052191673Sjamie goto done_unlock_list; 1053191673Sjamie } 1054191673Sjamie } 1055191673Sjamie /* Update: name must exist if no jid. */ 1056191673Sjamie else if (cuflags == JAIL_UPDATE && pr == NULL) { 1057191673Sjamie error = ENOENT; 1058191673Sjamie vfs_opterror(opts, "jail \"%s\" not found", 1059191673Sjamie name); 1060191673Sjamie goto done_unlock_list; 1061191673Sjamie } 1062191673Sjamie } 1063191673Sjamie } 1064191673Sjamie /* Update: must provide a jid or name. */ 1065191673Sjamie else if (cuflags == JAIL_UPDATE && pr == NULL) { 1066191673Sjamie error = ENOENT; 1067191673Sjamie vfs_opterror(opts, "update specified no jail"); 1068191673Sjamie goto done_unlock_list; 1069191673Sjamie } 1070185435Sbz 1071191673Sjamie /* If there's no prison to update, create a new one and link it in. */ 1072191673Sjamie if (pr == NULL) { 1073194762Sjamie for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) 1074194762Sjamie if (tpr->pr_childcount >= tpr->pr_childmax) { 1075194762Sjamie error = EPERM; 1076194762Sjamie vfs_opterror(opts, "prison limit exceeded"); 1077194762Sjamie goto done_unlock_list; 1078194762Sjamie } 1079191673Sjamie created = 1; 1080192895Sjamie mtx_lock(&ppr->pr_mtx); 1081192895Sjamie if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) { 1082192895Sjamie mtx_unlock(&ppr->pr_mtx); 1083192895Sjamie error = ENOENT; 1084192895Sjamie vfs_opterror(opts, "parent jail went away!"); 1085192895Sjamie goto done_unlock_list; 1086192895Sjamie } 1087192895Sjamie ppr->pr_ref++; 1088192895Sjamie ppr->pr_uref++; 1089192895Sjamie mtx_unlock(&ppr->pr_mtx); 1090191673Sjamie pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 1091191673Sjamie if (jid == 0) { 1092191673Sjamie /* Find the next free jid. */ 1093191673Sjamie jid = lastprid + 1; 1094191673Sjamie findnext: 1095191673Sjamie if (jid == JAIL_MAX) 1096191673Sjamie jid = 1; 1097191673Sjamie TAILQ_FOREACH(tpr, &allprison, pr_list) { 1098191673Sjamie if (tpr->pr_id < jid) 1099191673Sjamie continue; 1100191673Sjamie if (tpr->pr_id > jid || tpr->pr_ref == 0) { 1101191673Sjamie TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1102191673Sjamie break; 1103191673Sjamie } 1104191673Sjamie if (jid == lastprid) { 1105191673Sjamie error = EAGAIN; 1106191673Sjamie vfs_opterror(opts, 1107191673Sjamie "no available jail IDs"); 1108191673Sjamie free(pr, M_PRISON); 1109192895Sjamie prison_deref(ppr, PD_DEREF | 1110192895Sjamie PD_DEUREF | PD_LIST_XLOCKED); 1111192895Sjamie goto done_releroot; 1112191673Sjamie } 1113191673Sjamie jid++; 1114191673Sjamie goto findnext; 1115191673Sjamie } 1116191673Sjamie lastprid = jid; 1117191673Sjamie } else { 1118191673Sjamie /* 1119191673Sjamie * The jail already has a jid (that did not yet exist), 1120191673Sjamie * so just find where to insert it. 1121191673Sjamie */ 1122191673Sjamie TAILQ_FOREACH(tpr, &allprison, pr_list) 1123191673Sjamie if (tpr->pr_id >= jid) { 1124191673Sjamie TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1125191673Sjamie break; 1126191673Sjamie } 1127191673Sjamie } 1128191673Sjamie if (tpr == NULL) 1129191673Sjamie TAILQ_INSERT_TAIL(&allprison, pr, pr_list); 1130192895Sjamie LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); 1131192895Sjamie for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 1132194762Sjamie tpr->pr_childcount++; 1133185435Sbz 1134192895Sjamie pr->pr_parent = ppr; 1135191673Sjamie pr->pr_id = jid; 1136192895Sjamie 1137192895Sjamie /* Set some default values, and inherit some from the parent. */ 1138191673Sjamie if (name == NULL) 1139191673Sjamie name = ""; 1140191673Sjamie if (path == NULL) { 1141191673Sjamie path = "/"; 1142192895Sjamie root = mypr->pr_root; 1143191673Sjamie vref(root); 1144191673Sjamie } 1145195944Sjamie strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN); 1146195944Sjamie pr->pr_flags |= PR_HOST; 1147195945Sjamie#if defined(INET) || defined(INET6) 1148195945Sjamie#ifdef VIMAGE 1149195945Sjamie if (!(pr_flags & PR_VNET)) 1150195945Sjamie#endif 1151195945Sjamie { 1152192895Sjamie#ifdef INET 1153195974Sjamie if (!(ch_flags & PR_IP4_USER)) 1154195974Sjamie pr->pr_flags |= 1155195974Sjamie PR_IP4 | PR_IP4_USER | PR_IP4_DISABLE; 1156195974Sjamie else if (!(pr_flags & PR_IP4_USER)) { 1157195974Sjamie pr->pr_flags |= ppr->pr_flags & PR_IP4; 1158195974Sjamie if (ppr->pr_ip4 != NULL) { 1159195974Sjamie pr->pr_ip4s = ppr->pr_ip4s; 1160195974Sjamie pr->pr_ip4 = malloc(pr->pr_ip4s * 1161195974Sjamie sizeof(struct in_addr), M_PRISON, 1162195974Sjamie M_WAITOK); 1163195974Sjamie bcopy(ppr->pr_ip4, pr->pr_ip4, 1164195974Sjamie pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1165195974Sjamie } 1166195974Sjamie } 1167192895Sjamie#endif 1168192895Sjamie#ifdef INET6 1169195974Sjamie if (!(ch_flags & PR_IP6_USER)) 1170195974Sjamie pr->pr_flags |= 1171195974Sjamie PR_IP6 | PR_IP6_USER | PR_IP6_DISABLE; 1172195974Sjamie else if (!(pr_flags & PR_IP6_USER)) { 1173195974Sjamie pr->pr_flags |= ppr->pr_flags & PR_IP6; 1174195974Sjamie if (ppr->pr_ip6 != NULL) { 1175195974Sjamie pr->pr_ip6s = ppr->pr_ip6s; 1176195974Sjamie pr->pr_ip6 = malloc(pr->pr_ip6s * 1177195974Sjamie sizeof(struct in6_addr), M_PRISON, 1178195974Sjamie M_WAITOK); 1179195974Sjamie bcopy(ppr->pr_ip6, pr->pr_ip6, 1180195974Sjamie pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1181195974Sjamie } 1182195974Sjamie } 1183192895Sjamie#endif 1184195945Sjamie } 1185195945Sjamie#endif 1186192895Sjamie pr->pr_securelevel = ppr->pr_securelevel; 1187192895Sjamie pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; 1188196002Sjamie pr->pr_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 1189191673Sjamie 1190192895Sjamie LIST_INIT(&pr->pr_children); 1191192895Sjamie mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); 1192191673Sjamie 1193194251Sjamie#ifdef VIMAGE 1194194251Sjamie /* Allocate a new vnet if specified. */ 1195194251Sjamie pr->pr_vnet = (pr_flags & PR_VNET) 1196194251Sjamie ? vnet_alloc() : ppr->pr_vnet; 1197194251Sjamie#endif 1198185435Sbz /* 1199191673Sjamie * Allocate a dedicated cpuset for each jail. 1200191673Sjamie * Unlike other initial settings, this may return an erorr. 1201185435Sbz */ 1202192895Sjamie error = cpuset_create_root(ppr, &pr->pr_cpuset); 1203191673Sjamie if (error) { 1204191673Sjamie prison_deref(pr, PD_LIST_XLOCKED); 1205191673Sjamie goto done_releroot; 1206191673Sjamie } 1207185435Sbz 1208191673Sjamie mtx_lock(&pr->pr_mtx); 1209185435Sbz /* 1210191673Sjamie * New prisons do not yet have a reference, because we do not 1211191673Sjamie * want other to see the incomplete prison once the 1212191673Sjamie * allprison_lock is downgraded. 1213185435Sbz */ 1214191673Sjamie } else { 1215191673Sjamie created = 0; 1216195974Sjamie /* 1217195974Sjamie * Grab a reference for existing prisons, to ensure they 1218195974Sjamie * continue to exist for the duration of the call. 1219195974Sjamie */ 1220195974Sjamie pr->pr_ref++; 1221195945Sjamie#if defined(VIMAGE) && (defined(INET) || defined(INET6)) 1222195945Sjamie if ((pr->pr_flags & PR_VNET) && 1223195945Sjamie (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 1224195945Sjamie error = EINVAL; 1225195945Sjamie vfs_opterror(opts, 1226195945Sjamie "vnet jails cannot have IP address restrictions"); 1227195945Sjamie goto done_deref_locked; 1228195945Sjamie } 1229195945Sjamie#endif 1230195974Sjamie#ifdef INET 1231195974Sjamie if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1232195974Sjamie error = EINVAL; 1233195974Sjamie vfs_opterror(opts, 1234195974Sjamie "ip4 cannot be changed after creation"); 1235195974Sjamie goto done_deref_locked; 1236195974Sjamie } 1237195974Sjamie#endif 1238195974Sjamie#ifdef INET6 1239195974Sjamie if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1240195974Sjamie error = EINVAL; 1241195974Sjamie vfs_opterror(opts, 1242195974Sjamie "ip6 cannot be changed after creation"); 1243195974Sjamie goto done_deref_locked; 1244195974Sjamie } 1245195974Sjamie#endif 1246191673Sjamie } 1247185435Sbz 1248191673Sjamie /* Do final error checking before setting anything. */ 1249192895Sjamie if (gotslevel) { 1250192895Sjamie if (slevel < ppr->pr_securelevel) { 1251192895Sjamie error = EPERM; 1252192895Sjamie goto done_deref_locked; 1253192895Sjamie } 1254192895Sjamie } 1255194762Sjamie if (gotchildmax) { 1256194762Sjamie if (childmax >= ppr->pr_childmax) { 1257194762Sjamie error = EPERM; 1258194762Sjamie goto done_deref_locked; 1259194762Sjamie } 1260194762Sjamie } 1261192895Sjamie if (gotenforce) { 1262192895Sjamie if (enforce < ppr->pr_enforce_statfs) { 1263192895Sjamie error = EPERM; 1264192895Sjamie goto done_deref_locked; 1265192895Sjamie } 1266192895Sjamie } 1267185435Sbz#ifdef INET 1268195974Sjamie if (ip4s > 0) { 1269192895Sjamie if (ppr->pr_flags & PR_IP4) { 1270195974Sjamie /* 1271195974Sjamie * Make sure the new set of IP addresses is a 1272195974Sjamie * subset of the parent's list. Don't worry 1273195974Sjamie * about the parent being unlocked, as any 1274195974Sjamie * setting is done with allprison_lock held. 1275195974Sjamie */ 1276195974Sjamie for (ij = 0; ij < ppr->pr_ip4s; ij++) 1277195974Sjamie if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 1278195974Sjamie break; 1279195974Sjamie if (ij == ppr->pr_ip4s) { 1280195974Sjamie error = EPERM; 1281195974Sjamie goto done_deref_locked; 1282195974Sjamie } 1283195974Sjamie if (ip4s > 1) { 1284195974Sjamie for (ii = ij = 1; ii < ip4s; ii++) { 1285195974Sjamie if (ip4[ii].s_addr == 1286195974Sjamie ppr->pr_ip4[0].s_addr) 1287195974Sjamie continue; 1288195974Sjamie for (; ij < ppr->pr_ip4s; ij++) 1289195974Sjamie if (ip4[ii].s_addr == 1290195974Sjamie ppr->pr_ip4[ij].s_addr) 1291195974Sjamie break; 1292195974Sjamie if (ij == ppr->pr_ip4s) 1293195974Sjamie break; 1294192895Sjamie } 1295192895Sjamie if (ij == ppr->pr_ip4s) { 1296192895Sjamie error = EPERM; 1297192895Sjamie goto done_deref_locked; 1298192895Sjamie } 1299192895Sjamie } 1300192895Sjamie } 1301195974Sjamie /* 1302195974Sjamie * Check for conflicting IP addresses. We permit them 1303195974Sjamie * if there is no more than one IP on each jail. If 1304195974Sjamie * there is a duplicate on a jail with more than one 1305195974Sjamie * IP stop checking and return error. 1306195974Sjamie */ 1307195974Sjamie tppr = ppr; 1308195945Sjamie#ifdef VIMAGE 1309195974Sjamie for (; tppr != &prison0; tppr = tppr->pr_parent) 1310195974Sjamie if (tppr->pr_flags & PR_VNET) 1311195974Sjamie break; 1312195945Sjamie#endif 1313195974Sjamie FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1314195974Sjamie if (tpr == pr || 1315195945Sjamie#ifdef VIMAGE 1316195974Sjamie (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1317195945Sjamie#endif 1318195974Sjamie tpr->pr_uref == 0) { 1319192895Sjamie descend = 0; 1320195974Sjamie continue; 1321195974Sjamie } 1322195974Sjamie if (!(tpr->pr_flags & PR_IP4_USER)) 1323195974Sjamie continue; 1324195974Sjamie descend = 0; 1325195974Sjamie if (tpr->pr_ip4 == NULL || 1326195974Sjamie (ip4s == 1 && tpr->pr_ip4s == 1)) 1327195974Sjamie continue; 1328195974Sjamie for (ii = 0; ii < ip4s; ii++) { 1329195974Sjamie if (_prison_check_ip4(tpr, &ip4[ii]) == 0) { 1330195974Sjamie error = EADDRINUSE; 1331195974Sjamie vfs_opterror(opts, 1332195974Sjamie "IPv4 addresses clash"); 1333195974Sjamie goto done_deref_locked; 1334192895Sjamie } 1335192895Sjamie } 1336192895Sjamie } 1337192895Sjamie } 1338185435Sbz#endif 1339191673Sjamie#ifdef INET6 1340195974Sjamie if (ip6s > 0) { 1341192895Sjamie if (ppr->pr_flags & PR_IP6) { 1342195974Sjamie /* 1343195974Sjamie * Make sure the new set of IP addresses is a 1344195974Sjamie * subset of the parent's list. 1345195974Sjamie */ 1346195974Sjamie for (ij = 0; ij < ppr->pr_ip6s; ij++) 1347195974Sjamie if (IN6_ARE_ADDR_EQUAL(&ip6[0], 1348195974Sjamie &ppr->pr_ip6[ij])) 1349195974Sjamie break; 1350195974Sjamie if (ij == ppr->pr_ip6s) { 1351195974Sjamie error = EPERM; 1352195974Sjamie goto done_deref_locked; 1353195974Sjamie } 1354195974Sjamie if (ip6s > 1) { 1355195974Sjamie for (ii = ij = 1; ii < ip6s; ii++) { 1356195974Sjamie if (IN6_ARE_ADDR_EQUAL(&ip6[ii], 1357195974Sjamie &ppr->pr_ip6[0])) 1358195974Sjamie continue; 1359195974Sjamie for (; ij < ppr->pr_ip6s; ij++) 1360195974Sjamie if (IN6_ARE_ADDR_EQUAL( 1361195974Sjamie &ip6[ii], &ppr->pr_ip6[ij])) 1362195974Sjamie break; 1363195974Sjamie if (ij == ppr->pr_ip6s) 1364195974Sjamie break; 1365192895Sjamie } 1366192895Sjamie if (ij == ppr->pr_ip6s) { 1367192895Sjamie error = EPERM; 1368192895Sjamie goto done_deref_locked; 1369192895Sjamie } 1370192895Sjamie } 1371192895Sjamie } 1372195974Sjamie /* Check for conflicting IP addresses. */ 1373195974Sjamie tppr = ppr; 1374195945Sjamie#ifdef VIMAGE 1375195974Sjamie for (; tppr != &prison0; tppr = tppr->pr_parent) 1376195974Sjamie if (tppr->pr_flags & PR_VNET) 1377195974Sjamie break; 1378195945Sjamie#endif 1379195974Sjamie FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1380195974Sjamie if (tpr == pr || 1381195945Sjamie#ifdef VIMAGE 1382195974Sjamie (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1383195945Sjamie#endif 1384195974Sjamie tpr->pr_uref == 0) { 1385192895Sjamie descend = 0; 1386195974Sjamie continue; 1387195974Sjamie } 1388195974Sjamie if (!(tpr->pr_flags & PR_IP6_USER)) 1389195974Sjamie continue; 1390195974Sjamie descend = 0; 1391195974Sjamie if (tpr->pr_ip6 == NULL || 1392195974Sjamie (ip6s == 1 && tpr->pr_ip6s == 1)) 1393195974Sjamie continue; 1394195974Sjamie for (ii = 0; ii < ip6s; ii++) { 1395195974Sjamie if (_prison_check_ip6(tpr, &ip6[ii]) == 0) { 1396195974Sjamie error = EADDRINUSE; 1397195974Sjamie vfs_opterror(opts, 1398195974Sjamie "IPv6 addresses clash"); 1399195974Sjamie goto done_deref_locked; 1400192895Sjamie } 1401192895Sjamie } 1402191673Sjamie } 1403192895Sjamie } 1404191673Sjamie#endif 1405192895Sjamie onamelen = namelen = 0; 1406192895Sjamie if (name != NULL) { 1407191673Sjamie /* Give a default name of the jid. */ 1408191673Sjamie if (name[0] == '\0') 1409191673Sjamie snprintf(name = numbuf, sizeof(numbuf), "%d", jid); 1410191673Sjamie else if (strtoul(name, &p, 10) != jid && *p == '\0') { 1411191673Sjamie error = EINVAL; 1412191673Sjamie vfs_opterror(opts, "name cannot be numeric"); 1413192895Sjamie goto done_deref_locked; 1414191673Sjamie } 1415191673Sjamie /* 1416192895Sjamie * Make sure the name isn't too long for the prison or its 1417192895Sjamie * children. 1418191673Sjamie */ 1419192895Sjamie onamelen = strlen(pr->pr_name); 1420192895Sjamie namelen = strlen(name); 1421192895Sjamie if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) { 1422192895Sjamie error = ENAMETOOLONG; 1423192895Sjamie goto done_deref_locked; 1424192895Sjamie } 1425192895Sjamie FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1426192895Sjamie if (strlen(tpr->pr_name) + (namelen - onamelen) >= 1427192895Sjamie sizeof(pr->pr_name)) { 1428192895Sjamie error = ENAMETOOLONG; 1429192895Sjamie goto done_deref_locked; 1430192895Sjamie } 1431192895Sjamie } 1432191673Sjamie } 1433192895Sjamie if (pr_allow & ~ppr->pr_allow) { 1434192895Sjamie error = EPERM; 1435192895Sjamie goto done_deref_locked; 1436192895Sjamie } 1437185435Sbz 1438191673Sjamie /* Set the parameters of the prison. */ 1439191673Sjamie#ifdef INET 1440192895Sjamie redo_ip4 = 0; 1441195974Sjamie if (pr_flags & PR_IP4_USER) { 1442195974Sjamie pr->pr_flags |= PR_IP4; 1443195974Sjamie free(pr->pr_ip4, M_PRISON); 1444195974Sjamie pr->pr_ip4s = ip4s; 1445195974Sjamie pr->pr_ip4 = ip4; 1446195974Sjamie ip4 = NULL; 1447192895Sjamie FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1448195945Sjamie#ifdef VIMAGE 1449195945Sjamie if (tpr->pr_flags & PR_VNET) { 1450195945Sjamie descend = 0; 1451195945Sjamie continue; 1452195945Sjamie } 1453195945Sjamie#endif 1454192895Sjamie if (prison_restrict_ip4(tpr, NULL)) { 1455192895Sjamie redo_ip4 = 1; 1456192895Sjamie descend = 0; 1457192895Sjamie } 1458192895Sjamie } 1459185435Sbz } 1460191673Sjamie#endif 1461191673Sjamie#ifdef INET6 1462192895Sjamie redo_ip6 = 0; 1463195974Sjamie if (pr_flags & PR_IP6_USER) { 1464195974Sjamie pr->pr_flags |= PR_IP6; 1465195974Sjamie free(pr->pr_ip6, M_PRISON); 1466195974Sjamie pr->pr_ip6s = ip6s; 1467195974Sjamie pr->pr_ip6 = ip6; 1468195974Sjamie ip6 = NULL; 1469192895Sjamie FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1470195945Sjamie#ifdef VIMAGE 1471195945Sjamie if (tpr->pr_flags & PR_VNET) { 1472195945Sjamie descend = 0; 1473195945Sjamie continue; 1474195945Sjamie } 1475195945Sjamie#endif 1476192895Sjamie if (prison_restrict_ip6(tpr, NULL)) { 1477192895Sjamie redo_ip6 = 1; 1478192895Sjamie descend = 0; 1479192895Sjamie } 1480192895Sjamie } 1481191673Sjamie } 1482191673Sjamie#endif 1483192895Sjamie if (gotslevel) { 1484191673Sjamie pr->pr_securelevel = slevel; 1485192895Sjamie /* Set all child jails to be at least this level. */ 1486192895Sjamie FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1487192895Sjamie if (tpr->pr_securelevel < slevel) 1488192895Sjamie tpr->pr_securelevel = slevel; 1489192895Sjamie } 1490194762Sjamie if (gotchildmax) { 1491194762Sjamie pr->pr_childmax = childmax; 1492194762Sjamie /* Set all child jails to under this limit. */ 1493194762Sjamie FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level) 1494194762Sjamie if (tpr->pr_childmax > childmax - level) 1495194762Sjamie tpr->pr_childmax = childmax > level 1496194762Sjamie ? childmax - level : 0; 1497194762Sjamie } 1498192895Sjamie if (gotenforce) { 1499192895Sjamie pr->pr_enforce_statfs = enforce; 1500192895Sjamie /* Pass this restriction on to the children. */ 1501192895Sjamie FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1502192895Sjamie if (tpr->pr_enforce_statfs < enforce) 1503192895Sjamie tpr->pr_enforce_statfs = enforce; 1504192895Sjamie } 1505192895Sjamie if (name != NULL) { 1506192895Sjamie if (ppr == &prison0) 1507192895Sjamie strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); 1508192895Sjamie else 1509192895Sjamie snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", 1510192895Sjamie ppr->pr_name, name); 1511192895Sjamie /* Change this component of child names. */ 1512192895Sjamie FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1513192895Sjamie bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, 1514192895Sjamie strlen(tpr->pr_name + onamelen) + 1); 1515192895Sjamie bcopy(pr->pr_name, tpr->pr_name, namelen); 1516192895Sjamie } 1517192895Sjamie } 1518191673Sjamie if (path != NULL) { 1519192895Sjamie /* Try to keep a real-rooted full pathname. */ 1520192895Sjamie if (path[0] == '/' && strcmp(mypr->pr_path, "/")) 1521192895Sjamie snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s", 1522192895Sjamie mypr->pr_path, path); 1523192895Sjamie else 1524192895Sjamie strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); 1525191673Sjamie pr->pr_root = root; 1526191673Sjamie } 1527193066Sjamie if (PR_HOST & ch_flags & ~pr_flags) { 1528193066Sjamie if (pr->pr_flags & PR_HOST) { 1529193066Sjamie /* 1530193066Sjamie * Copy the parent's host info. As with pr_ip4 above, 1531193066Sjamie * the lack of a lock on the parent is not a problem; 1532193066Sjamie * it is always set with allprison_lock at least 1533193066Sjamie * shared, and is held exclusively here. 1534193066Sjamie */ 1535194118Sjamie strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname, 1536194118Sjamie sizeof(pr->pr_hostname)); 1537194118Sjamie strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname, 1538194118Sjamie sizeof(pr->pr_domainname)); 1539194118Sjamie strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid, 1540194118Sjamie sizeof(pr->pr_hostuuid)); 1541193066Sjamie pr->pr_hostid = pr->pr_parent->pr_hostid; 1542193066Sjamie } 1543193066Sjamie } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1544193066Sjamie /* Set this prison, and any descendants without PR_HOST. */ 1545193066Sjamie if (host != NULL) 1546194118Sjamie strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname)); 1547193066Sjamie if (domain != NULL) 1548194118Sjamie strlcpy(pr->pr_domainname, domain, 1549194118Sjamie sizeof(pr->pr_domainname)); 1550193066Sjamie if (uuid != NULL) 1551194118Sjamie strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid)); 1552193066Sjamie if (gothid) 1553193066Sjamie pr->pr_hostid = hid; 1554193066Sjamie FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1555193066Sjamie if (tpr->pr_flags & PR_HOST) 1556193066Sjamie descend = 0; 1557193066Sjamie else { 1558193066Sjamie if (host != NULL) 1559194118Sjamie strlcpy(tpr->pr_hostname, 1560194118Sjamie pr->pr_hostname, 1561194118Sjamie sizeof(tpr->pr_hostname)); 1562193066Sjamie if (domain != NULL) 1563194118Sjamie strlcpy(tpr->pr_domainname, 1564194118Sjamie pr->pr_domainname, 1565194118Sjamie sizeof(tpr->pr_domainname)); 1566193066Sjamie if (uuid != NULL) 1567194118Sjamie strlcpy(tpr->pr_hostuuid, 1568194118Sjamie pr->pr_hostuuid, 1569194118Sjamie sizeof(tpr->pr_hostuuid)); 1570193066Sjamie if (gothid) 1571193066Sjamie tpr->pr_hostid = hid; 1572193066Sjamie } 1573193066Sjamie } 1574193066Sjamie } 1575192895Sjamie if ((tallow = ch_allow & ~pr_allow)) { 1576192895Sjamie /* Clear allow bits in all children. */ 1577192895Sjamie FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1578192895Sjamie tpr->pr_allow &= ~tallow; 1579192895Sjamie } 1580192895Sjamie pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; 1581191673Sjamie /* 1582191673Sjamie * Persistent prisons get an extra reference, and prisons losing their 1583191673Sjamie * persist flag lose that reference. Only do this for existing prisons 1584191673Sjamie * for now, so new ones will remain unseen until after the module 1585191673Sjamie * handlers have completed. 1586191673Sjamie */ 1587191673Sjamie if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { 1588191673Sjamie if (pr_flags & PR_PERSIST) { 1589191673Sjamie pr->pr_ref++; 1590191673Sjamie pr->pr_uref++; 1591191673Sjamie } else { 1592191673Sjamie pr->pr_ref--; 1593191673Sjamie pr->pr_uref--; 1594191673Sjamie } 1595191673Sjamie } 1596191673Sjamie pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; 1597191673Sjamie mtx_unlock(&pr->pr_mtx); 1598185435Sbz 1599192895Sjamie /* Locks may have prevented a complete restriction of child IP 1600192895Sjamie * addresses. If so, allocate some more memory and try again. 1601192895Sjamie */ 1602192895Sjamie#ifdef INET 1603192895Sjamie while (redo_ip4) { 1604192895Sjamie ip4s = pr->pr_ip4s; 1605192895Sjamie ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 1606192895Sjamie mtx_lock(&pr->pr_mtx); 1607192895Sjamie redo_ip4 = 0; 1608192895Sjamie FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1609195945Sjamie#ifdef VIMAGE 1610195945Sjamie if (tpr->pr_flags & PR_VNET) { 1611195945Sjamie descend = 0; 1612195945Sjamie continue; 1613195945Sjamie } 1614195945Sjamie#endif 1615192895Sjamie if (prison_restrict_ip4(tpr, ip4)) { 1616192895Sjamie if (ip4 != NULL) 1617192895Sjamie ip4 = NULL; 1618192895Sjamie else 1619192895Sjamie redo_ip4 = 1; 1620192895Sjamie } 1621192895Sjamie } 1622192895Sjamie mtx_unlock(&pr->pr_mtx); 1623192895Sjamie } 1624192895Sjamie#endif 1625192895Sjamie#ifdef INET6 1626192895Sjamie while (redo_ip6) { 1627192895Sjamie ip6s = pr->pr_ip6s; 1628192895Sjamie ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 1629192895Sjamie mtx_lock(&pr->pr_mtx); 1630192895Sjamie redo_ip6 = 0; 1631192895Sjamie FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1632195945Sjamie#ifdef VIMAGE 1633195945Sjamie if (tpr->pr_flags & PR_VNET) { 1634195945Sjamie descend = 0; 1635195945Sjamie continue; 1636195945Sjamie } 1637195945Sjamie#endif 1638192895Sjamie if (prison_restrict_ip6(tpr, ip6)) { 1639192895Sjamie if (ip6 != NULL) 1640192895Sjamie ip6 = NULL; 1641192895Sjamie else 1642192895Sjamie redo_ip6 = 1; 1643192895Sjamie } 1644192895Sjamie } 1645192895Sjamie mtx_unlock(&pr->pr_mtx); 1646192895Sjamie } 1647192895Sjamie#endif 1648192895Sjamie 1649191673Sjamie /* Let the modules do their work. */ 1650191673Sjamie sx_downgrade(&allprison_lock); 1651191673Sjamie if (created) { 1652191673Sjamie error = osd_jail_call(pr, PR_METHOD_CREATE, opts); 1653191673Sjamie if (error) { 1654191673Sjamie prison_deref(pr, PD_LIST_SLOCKED); 1655191673Sjamie goto done_errmsg; 1656191673Sjamie } 1657191673Sjamie } 1658191673Sjamie error = osd_jail_call(pr, PR_METHOD_SET, opts); 1659191673Sjamie if (error) { 1660191673Sjamie prison_deref(pr, created 1661191673Sjamie ? PD_LIST_SLOCKED 1662191673Sjamie : PD_DEREF | PD_LIST_SLOCKED); 1663191673Sjamie goto done_errmsg; 1664191673Sjamie } 1665191673Sjamie 1666191673Sjamie /* Attach this process to the prison if requested. */ 1667191673Sjamie if (flags & JAIL_ATTACH) { 1668191673Sjamie mtx_lock(&pr->pr_mtx); 1669191673Sjamie error = do_jail_attach(td, pr); 1670191673Sjamie if (error) { 1671191673Sjamie vfs_opterror(opts, "attach failed"); 1672191673Sjamie if (!created) 1673191673Sjamie prison_deref(pr, PD_DEREF); 1674191673Sjamie goto done_errmsg; 1675191673Sjamie } 1676191673Sjamie } 1677191673Sjamie 1678191673Sjamie /* 1679191673Sjamie * Now that it is all there, drop the temporary reference from existing 1680191673Sjamie * prisons. Or add a reference to newly created persistent prisons 1681191673Sjamie * (which was not done earlier so that the prison would not be publicly 1682191673Sjamie * visible). 1683191673Sjamie */ 1684191673Sjamie if (!created) { 1685191673Sjamie prison_deref(pr, (flags & JAIL_ATTACH) 1686191673Sjamie ? PD_DEREF 1687191673Sjamie : PD_DEREF | PD_LIST_SLOCKED); 1688191673Sjamie } else { 1689191673Sjamie if (pr_flags & PR_PERSIST) { 1690191673Sjamie mtx_lock(&pr->pr_mtx); 1691191673Sjamie pr->pr_ref++; 1692191673Sjamie pr->pr_uref++; 1693191673Sjamie mtx_unlock(&pr->pr_mtx); 1694191673Sjamie } 1695191673Sjamie if (!(flags & JAIL_ATTACH)) 1696191673Sjamie sx_sunlock(&allprison_lock); 1697191673Sjamie } 1698191673Sjamie td->td_retval[0] = pr->pr_id; 1699191673Sjamie goto done_errmsg; 1700191673Sjamie 1701192895Sjamie done_deref_locked: 1702192895Sjamie prison_deref(pr, created 1703192895Sjamie ? PD_LOCKED | PD_LIST_XLOCKED 1704192895Sjamie : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 1705192895Sjamie goto done_releroot; 1706191673Sjamie done_unlock_list: 1707191673Sjamie sx_xunlock(&allprison_lock); 1708191673Sjamie done_releroot: 1709191673Sjamie if (root != NULL) { 1710191673Sjamie vfslocked = VFS_LOCK_GIANT(root->v_mount); 1711191673Sjamie vrele(root); 1712191673Sjamie VFS_UNLOCK_GIANT(vfslocked); 1713191673Sjamie } 1714191673Sjamie done_errmsg: 1715191673Sjamie if (error) { 1716191673Sjamie vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1717191673Sjamie if (errmsg_len > 0) { 1718191673Sjamie errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; 1719191673Sjamie if (errmsg_pos > 0) { 1720191673Sjamie if (optuio->uio_segflg == UIO_SYSSPACE) 1721191673Sjamie bcopy(errmsg, 1722191673Sjamie optuio->uio_iov[errmsg_pos].iov_base, 1723191673Sjamie errmsg_len); 1724191673Sjamie else 1725191673Sjamie copyout(errmsg, 1726191673Sjamie optuio->uio_iov[errmsg_pos].iov_base, 1727191673Sjamie errmsg_len); 1728191673Sjamie } 1729191673Sjamie } 1730191673Sjamie } 1731191673Sjamie done_free: 1732191673Sjamie#ifdef INET 1733191673Sjamie free(ip4, M_PRISON); 1734191673Sjamie#endif 1735191673Sjamie#ifdef INET6 1736191673Sjamie free(ip6, M_PRISON); 1737191673Sjamie#endif 1738191673Sjamie vfs_freeopts(opts); 1739191673Sjamie return (error); 1740191673Sjamie} 1741191673Sjamie 1742191673Sjamie 174382710Sdillon/* 1744191673Sjamie * struct jail_get_args { 1745191673Sjamie * struct iovec *iovp; 1746191673Sjamie * unsigned int iovcnt; 1747191673Sjamie * int flags; 1748114168Smike * }; 174982710Sdillon */ 175046155Sphkint 1751191673Sjamiejail_get(struct thread *td, struct jail_get_args *uap) 175246155Sphk{ 1753191673Sjamie struct uio *auio; 1754185435Sbz int error; 1755185435Sbz 1756191673Sjamie /* Check that we have an even number of iovecs. */ 1757191673Sjamie if (uap->iovcnt & 1) 1758191673Sjamie return (EINVAL); 1759191673Sjamie 1760191673Sjamie error = copyinuio(uap->iovp, uap->iovcnt, &auio); 1761185435Sbz if (error) 1762185435Sbz return (error); 1763191673Sjamie error = kern_jail_get(td, auio, uap->flags); 1764191673Sjamie if (error == 0) 1765191673Sjamie error = copyout(auio->uio_iov, uap->iovp, 1766191673Sjamie uap->iovcnt * sizeof (struct iovec)); 1767191673Sjamie free(auio, M_IOV); 1768191673Sjamie return (error); 1769191673Sjamie} 1770185435Sbz 1771191673Sjamieint 1772191673Sjamiekern_jail_get(struct thread *td, struct uio *optuio, int flags) 1773191673Sjamie{ 1774192895Sjamie struct prison *pr, *mypr; 1775191673Sjamie struct vfsopt *opt; 1776191673Sjamie struct vfsoptlist *opts; 1777191673Sjamie char *errmsg, *name; 1778192895Sjamie int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos; 1779185435Sbz 1780191673Sjamie if (flags & ~JAIL_GET_MASK) 1781191673Sjamie return (EINVAL); 1782185435Sbz 1783191673Sjamie /* Get the parameter list. */ 1784191673Sjamie error = vfs_buildopts(optuio, &opts); 1785191673Sjamie if (error) 1786191673Sjamie return (error); 1787191673Sjamie errmsg_pos = vfs_getopt_pos(opts, "errmsg"); 1788192895Sjamie mypr = td->td_ucred->cr_prison; 1789185435Sbz 1790191673Sjamie /* 1791191673Sjamie * Find the prison specified by one of: lastjid, jid, name. 1792191673Sjamie */ 1793191673Sjamie sx_slock(&allprison_lock); 1794191673Sjamie error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); 1795191673Sjamie if (error == 0) { 1796191673Sjamie TAILQ_FOREACH(pr, &allprison, pr_list) { 1797192895Sjamie if (pr->pr_id > jid && prison_ischild(mypr, pr)) { 1798191673Sjamie mtx_lock(&pr->pr_mtx); 1799191673Sjamie if (pr->pr_ref > 0 && 1800191673Sjamie (pr->pr_uref > 0 || (flags & JAIL_DYING))) 1801191673Sjamie break; 1802191673Sjamie mtx_unlock(&pr->pr_mtx); 1803191673Sjamie } 1804191673Sjamie } 1805191673Sjamie if (pr != NULL) 1806191673Sjamie goto found_prison; 1807191673Sjamie error = ENOENT; 1808191673Sjamie vfs_opterror(opts, "no jail after %d", jid); 1809191673Sjamie goto done_unlock_list; 1810191673Sjamie } else if (error != ENOENT) 1811191673Sjamie goto done_unlock_list; 1812185435Sbz 1813191673Sjamie error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 1814191673Sjamie if (error == 0) { 1815191673Sjamie if (jid != 0) { 1816192895Sjamie pr = prison_find_child(mypr, jid); 1817191673Sjamie if (pr != NULL) { 1818191673Sjamie if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1819191673Sjamie mtx_unlock(&pr->pr_mtx); 1820191673Sjamie error = ENOENT; 1821191673Sjamie vfs_opterror(opts, "jail %d is dying", 1822191673Sjamie jid); 1823191673Sjamie goto done_unlock_list; 1824191673Sjamie } 1825191673Sjamie goto found_prison; 1826191673Sjamie } 1827191673Sjamie error = ENOENT; 1828191673Sjamie vfs_opterror(opts, "jail %d not found", jid); 1829191673Sjamie goto done_unlock_list; 1830191673Sjamie } 1831191673Sjamie } else if (error != ENOENT) 1832191673Sjamie goto done_unlock_list; 183346155Sphk 1834191673Sjamie error = vfs_getopt(opts, "name", (void **)&name, &len); 1835191673Sjamie if (error == 0) { 1836191673Sjamie if (len == 0 || name[len - 1] != '\0') { 1837191673Sjamie error = EINVAL; 1838191673Sjamie goto done_unlock_list; 1839191673Sjamie } 1840192895Sjamie pr = prison_find_name(mypr, name); 1841191673Sjamie if (pr != NULL) { 1842191673Sjamie if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1843191673Sjamie mtx_unlock(&pr->pr_mtx); 1844191673Sjamie error = ENOENT; 1845191673Sjamie vfs_opterror(opts, "jail \"%s\" is dying", 1846191673Sjamie name); 1847191673Sjamie goto done_unlock_list; 1848191673Sjamie } 1849191673Sjamie goto found_prison; 1850191673Sjamie } 1851191673Sjamie error = ENOENT; 1852191673Sjamie vfs_opterror(opts, "jail \"%s\" not found", name); 1853191673Sjamie goto done_unlock_list; 1854191673Sjamie } else if (error != ENOENT) 1855191673Sjamie goto done_unlock_list; 1856185435Sbz 1857191673Sjamie vfs_opterror(opts, "no jail specified"); 1858191673Sjamie error = ENOENT; 1859191673Sjamie goto done_unlock_list; 1860191673Sjamie 1861191673Sjamie found_prison: 1862191673Sjamie /* Get the parameters of the prison. */ 1863191673Sjamie pr->pr_ref++; 1864191673Sjamie locked = PD_LOCKED; 1865191673Sjamie td->td_retval[0] = pr->pr_id; 1866191673Sjamie error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); 1867191673Sjamie if (error != 0 && error != ENOENT) 1868191673Sjamie goto done_deref; 1869192895Sjamie i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; 1870192895Sjamie error = vfs_setopt(opts, "parent", &i, sizeof(i)); 1871191673Sjamie if (error != 0 && error != ENOENT) 1872191673Sjamie goto done_deref; 1873192895Sjamie error = vfs_setopts(opts, "name", prison_name(mypr, pr)); 1874192895Sjamie if (error != 0 && error != ENOENT) 1875192895Sjamie goto done_deref; 1876192895Sjamie error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, 1877191673Sjamie sizeof(pr->pr_cpuset->cs_id)); 1878191673Sjamie if (error != 0 && error != ENOENT) 1879191673Sjamie goto done_deref; 1880192895Sjamie error = vfs_setopts(opts, "path", prison_path(mypr, pr)); 1881191673Sjamie if (error != 0 && error != ENOENT) 1882191673Sjamie goto done_deref; 1883191673Sjamie#ifdef INET 1884191673Sjamie error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, 1885191673Sjamie pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1886191673Sjamie if (error != 0 && error != ENOENT) 1887191673Sjamie goto done_deref; 1888191673Sjamie#endif 1889191673Sjamie#ifdef INET6 1890191673Sjamie error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, 1891191673Sjamie pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1892191673Sjamie if (error != 0 && error != ENOENT) 1893191673Sjamie goto done_deref; 1894191673Sjamie#endif 1895191673Sjamie error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, 1896191673Sjamie sizeof(pr->pr_securelevel)); 1897191673Sjamie if (error != 0 && error != ENOENT) 1898191673Sjamie goto done_deref; 1899194762Sjamie error = vfs_setopt(opts, "children.cur", &pr->pr_childcount, 1900194762Sjamie sizeof(pr->pr_childcount)); 1901194762Sjamie if (error != 0 && error != ENOENT) 1902194762Sjamie goto done_deref; 1903194762Sjamie error = vfs_setopt(opts, "children.max", &pr->pr_childmax, 1904194762Sjamie sizeof(pr->pr_childmax)); 1905194762Sjamie if (error != 0 && error != ENOENT) 1906194762Sjamie goto done_deref; 1907194118Sjamie error = vfs_setopts(opts, "host.hostname", pr->pr_hostname); 1908191673Sjamie if (error != 0 && error != ENOENT) 1909191673Sjamie goto done_deref; 1910194118Sjamie error = vfs_setopts(opts, "host.domainname", pr->pr_domainname); 1911193066Sjamie if (error != 0 && error != ENOENT) 1912193066Sjamie goto done_deref; 1913194118Sjamie error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid); 1914193066Sjamie if (error != 0 && error != ENOENT) 1915193066Sjamie goto done_deref; 1916193066Sjamie#ifdef COMPAT_IA32 1917193066Sjamie if (td->td_proc->p_sysent->sv_flags & SV_IA32) { 1918193066Sjamie uint32_t hid32 = pr->pr_hostid; 1919193066Sjamie 1920193066Sjamie error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); 1921193066Sjamie } else 1922193066Sjamie#endif 1923193066Sjamie error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, 1924193066Sjamie sizeof(pr->pr_hostid)); 1925193066Sjamie if (error != 0 && error != ENOENT) 1926193066Sjamie goto done_deref; 1927192895Sjamie error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, 1928192895Sjamie sizeof(pr->pr_enforce_statfs)); 1929191673Sjamie if (error != 0 && error != ENOENT) 1930191673Sjamie goto done_deref; 1931192895Sjamie for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 1932192895Sjamie fi++) { 1933192895Sjamie if (pr_flag_names[fi] == NULL) 1934192895Sjamie continue; 1935192895Sjamie i = (pr->pr_flags & (1 << fi)) ? 1 : 0; 1936192895Sjamie error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i)); 1937192895Sjamie if (error != 0 && error != ENOENT) 1938192895Sjamie goto done_deref; 1939192895Sjamie i = !i; 1940192895Sjamie error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i)); 1941192895Sjamie if (error != 0 && error != ENOENT) 1942192895Sjamie goto done_deref; 1943192895Sjamie } 1944195870Sjamie for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 1945195870Sjamie fi++) { 1946195870Sjamie i = pr->pr_flags & 1947195870Sjamie (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); 1948195870Sjamie i = pr_flag_jailsys[fi].disable && 1949195870Sjamie (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE 1950195870Sjamie : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW 1951195870Sjamie : JAIL_SYS_INHERIT; 1952195870Sjamie error = 1953195870Sjamie vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i)); 1954195870Sjamie if (error != 0 && error != ENOENT) 1955195870Sjamie goto done_deref; 1956195870Sjamie } 1957192895Sjamie for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 1958192895Sjamie fi++) { 1959192895Sjamie if (pr_allow_names[fi] == NULL) 1960192895Sjamie continue; 1961192895Sjamie i = (pr->pr_allow & (1 << fi)) ? 1 : 0; 1962192895Sjamie error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i)); 1963192895Sjamie if (error != 0 && error != ENOENT) 1964192895Sjamie goto done_deref; 1965192895Sjamie i = !i; 1966192895Sjamie error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i)); 1967192895Sjamie if (error != 0 && error != ENOENT) 1968192895Sjamie goto done_deref; 1969192895Sjamie } 1970191673Sjamie i = (pr->pr_uref == 0); 1971191673Sjamie error = vfs_setopt(opts, "dying", &i, sizeof(i)); 1972191673Sjamie if (error != 0 && error != ENOENT) 1973191673Sjamie goto done_deref; 1974191673Sjamie i = !i; 1975191673Sjamie error = vfs_setopt(opts, "nodying", &i, sizeof(i)); 1976191673Sjamie if (error != 0 && error != ENOENT) 1977191673Sjamie goto done_deref; 1978191673Sjamie 1979191673Sjamie /* Get the module parameters. */ 1980191673Sjamie mtx_unlock(&pr->pr_mtx); 1981191673Sjamie locked = 0; 1982191673Sjamie error = osd_jail_call(pr, PR_METHOD_GET, opts); 198346155Sphk if (error) 1984191673Sjamie goto done_deref; 1985191673Sjamie prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); 198684828Sjhb 1987191673Sjamie /* By now, all parameters should have been noted. */ 1988191673Sjamie TAILQ_FOREACH(opt, opts, link) { 1989191673Sjamie if (!opt->seen && strcmp(opt->name, "errmsg")) { 1990191673Sjamie error = EINVAL; 1991191673Sjamie vfs_opterror(opts, "unknown parameter: %s", opt->name); 1992191673Sjamie goto done_errmsg; 1993191673Sjamie } 1994185435Sbz } 1995191673Sjamie 1996191673Sjamie /* Write the fetched parameters back to userspace. */ 1997191673Sjamie error = 0; 1998191673Sjamie TAILQ_FOREACH(opt, opts, link) { 1999191673Sjamie if (opt->pos >= 0 && opt->pos != errmsg_pos) { 2000191673Sjamie pos = 2 * opt->pos + 1; 2001191673Sjamie optuio->uio_iov[pos].iov_len = opt->len; 2002191673Sjamie if (opt->value != NULL) { 2003191673Sjamie if (optuio->uio_segflg == UIO_SYSSPACE) { 2004191673Sjamie bcopy(opt->value, 2005191673Sjamie optuio->uio_iov[pos].iov_base, 2006191673Sjamie opt->len); 2007191673Sjamie } else { 2008191673Sjamie error = copyout(opt->value, 2009191673Sjamie optuio->uio_iov[pos].iov_base, 2010191673Sjamie opt->len); 2011191673Sjamie if (error) 2012191673Sjamie break; 2013191673Sjamie } 2014191673Sjamie } 2015191673Sjamie } 2016185435Sbz } 2017191673Sjamie goto done_errmsg; 2018191673Sjamie 2019191673Sjamie done_deref: 2020191673Sjamie prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); 2021191673Sjamie goto done_errmsg; 2022191673Sjamie 2023191673Sjamie done_unlock_list: 2024191673Sjamie sx_sunlock(&allprison_lock); 2025191673Sjamie done_errmsg: 2026191673Sjamie if (error && errmsg_pos >= 0) { 2027191673Sjamie vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 2028191673Sjamie errmsg_pos = 2 * errmsg_pos + 1; 2029191673Sjamie if (errmsg_len > 0) { 2030191673Sjamie if (optuio->uio_segflg == UIO_SYSSPACE) 2031191673Sjamie bcopy(errmsg, 2032191673Sjamie optuio->uio_iov[errmsg_pos].iov_base, 2033191673Sjamie errmsg_len); 2034191673Sjamie else 2035191673Sjamie copyout(errmsg, 2036191673Sjamie optuio->uio_iov[errmsg_pos].iov_base, 2037191673Sjamie errmsg_len); 2038191673Sjamie } 2039185435Sbz } 2040191673Sjamie vfs_freeopts(opts); 2041191673Sjamie return (error); 2042191673Sjamie} 2043113275Smike 2044192895Sjamie 2045191673Sjamie/* 2046191673Sjamie * struct jail_remove_args { 2047191673Sjamie * int jid; 2048191673Sjamie * }; 2049191673Sjamie */ 2050191673Sjamieint 2051191673Sjamiejail_remove(struct thread *td, struct jail_remove_args *uap) 2052191673Sjamie{ 2053192895Sjamie struct prison *pr, *cpr, *lpr, *tpr; 2054192895Sjamie int descend, error; 2055185435Sbz 2056191673Sjamie error = priv_check(td, PRIV_JAIL_REMOVE); 2057185435Sbz if (error) 2058191673Sjamie return (error); 2059185435Sbz 2060185435Sbz sx_xlock(&allprison_lock); 2061192895Sjamie pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2062191673Sjamie if (pr == NULL) { 2063185435Sbz sx_xunlock(&allprison_lock); 2064191673Sjamie return (EINVAL); 2065185435Sbz } 2066185435Sbz 2067192895Sjamie /* Remove all descendants of this prison, then remove this prison. */ 2068192895Sjamie pr->pr_ref++; 2069192895Sjamie pr->pr_flags |= PR_REMOVE; 2070192895Sjamie if (!LIST_EMPTY(&pr->pr_children)) { 2071192895Sjamie mtx_unlock(&pr->pr_mtx); 2072192895Sjamie lpr = NULL; 2073192895Sjamie FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 2074192895Sjamie mtx_lock(&cpr->pr_mtx); 2075192895Sjamie if (cpr->pr_ref > 0) { 2076192895Sjamie tpr = cpr; 2077192895Sjamie cpr->pr_ref++; 2078192895Sjamie cpr->pr_flags |= PR_REMOVE; 2079192895Sjamie } else { 2080192895Sjamie /* Already removed - do not do it again. */ 2081192895Sjamie tpr = NULL; 2082192895Sjamie } 2083192895Sjamie mtx_unlock(&cpr->pr_mtx); 2084192895Sjamie if (lpr != NULL) { 2085192895Sjamie mtx_lock(&lpr->pr_mtx); 2086192895Sjamie prison_remove_one(lpr); 2087192895Sjamie sx_xlock(&allprison_lock); 2088192895Sjamie } 2089192895Sjamie lpr = tpr; 2090192895Sjamie } 2091192895Sjamie if (lpr != NULL) { 2092192895Sjamie mtx_lock(&lpr->pr_mtx); 2093192895Sjamie prison_remove_one(lpr); 2094192895Sjamie sx_xlock(&allprison_lock); 2095192895Sjamie } 2096192895Sjamie mtx_lock(&pr->pr_mtx); 2097192895Sjamie } 2098192895Sjamie prison_remove_one(pr); 2099192895Sjamie return (0); 2100192895Sjamie} 2101192895Sjamie 2102192895Sjamiestatic void 2103192895Sjamieprison_remove_one(struct prison *pr) 2104192895Sjamie{ 2105192895Sjamie struct proc *p; 2106192895Sjamie int deuref; 2107192895Sjamie 2108191673Sjamie /* If the prison was persistent, it is not anymore. */ 2109191673Sjamie deuref = 0; 2110191673Sjamie if (pr->pr_flags & PR_PERSIST) { 2111191673Sjamie pr->pr_ref--; 2112191673Sjamie deuref = PD_DEUREF; 2113191673Sjamie pr->pr_flags &= ~PR_PERSIST; 2114179881Sdelphij } 2115113275Smike 2116192895Sjamie /* 2117192895Sjamie * jail_remove added a reference. If that's the only one, remove 2118192895Sjamie * the prison now. 2119192895Sjamie */ 2120192895Sjamie KASSERT(pr->pr_ref > 0, 2121192895Sjamie ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); 2122192895Sjamie if (pr->pr_ref == 1) { 2123191673Sjamie prison_deref(pr, 2124191673Sjamie deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 2125192895Sjamie return; 2126191673Sjamie } 2127191673Sjamie 2128113275Smike mtx_unlock(&pr->pr_mtx); 2129191673Sjamie sx_xunlock(&allprison_lock); 2130191673Sjamie /* 2131191673Sjamie * Kill all processes unfortunate enough to be attached to this prison. 2132191673Sjamie */ 2133191673Sjamie sx_slock(&allproc_lock); 2134191673Sjamie LIST_FOREACH(p, &allproc, p_list) { 2135191673Sjamie PROC_LOCK(p); 2136191673Sjamie if (p->p_state != PRS_NEW && p->p_ucred && 2137191673Sjamie p->p_ucred->cr_prison == pr) 2138191673Sjamie psignal(p, SIGKILL); 2139191673Sjamie PROC_UNLOCK(p); 2140191673Sjamie } 2141191673Sjamie sx_sunlock(&allproc_lock); 2142192895Sjamie /* Remove the temporary reference added by jail_remove. */ 2143191673Sjamie prison_deref(pr, deuref | PD_DEREF); 2144113275Smike} 2145113275Smike 2146190466Sjamie 2147113275Smike/* 2148114168Smike * struct jail_attach_args { 2149114168Smike * int jid; 2150114168Smike * }; 2151113275Smike */ 2152113275Smikeint 2153114168Smikejail_attach(struct thread *td, struct jail_attach_args *uap) 2154113275Smike{ 2155113275Smike struct prison *pr; 2156191673Sjamie int error; 2157167309Spjd 2158164032Srwatson error = priv_check(td, PRIV_JAIL_ATTACH); 2159126023Snectar if (error) 2160126023Snectar return (error); 2161126023Snectar 2162168401Spjd sx_slock(&allprison_lock); 2163192895Sjamie pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2164113275Smike if (pr == NULL) { 2165168401Spjd sx_sunlock(&allprison_lock); 2166113275Smike return (EINVAL); 2167113275Smike } 2168185435Sbz 2169185435Sbz /* 2170185435Sbz * Do not allow a process to attach to a prison that is not 2171191673Sjamie * considered to be "alive". 2172185435Sbz */ 2173191673Sjamie if (pr->pr_uref == 0) { 2174185435Sbz mtx_unlock(&pr->pr_mtx); 2175185435Sbz sx_sunlock(&allprison_lock); 2176185435Sbz return (EINVAL); 2177185435Sbz } 2178191673Sjamie 2179191673Sjamie return (do_jail_attach(td, pr)); 2180191673Sjamie} 2181191673Sjamie 2182191673Sjamiestatic int 2183191673Sjamiedo_jail_attach(struct thread *td, struct prison *pr) 2184191673Sjamie{ 2185192895Sjamie struct prison *ppr; 2186191673Sjamie struct proc *p; 2187191673Sjamie struct ucred *newcred, *oldcred; 2188191673Sjamie int vfslocked, error; 2189191673Sjamie 2190191673Sjamie /* 2191191673Sjamie * XXX: Note that there is a slight race here if two threads 2192191673Sjamie * in the same privileged process attempt to attach to two 2193191673Sjamie * different jails at the same time. It is important for 2194191673Sjamie * user processes not to do this, or they might end up with 2195191673Sjamie * a process root from one prison, but attached to the jail 2196191673Sjamie * of another. 2197191673Sjamie */ 2198113275Smike pr->pr_ref++; 2199191673Sjamie pr->pr_uref++; 2200113275Smike mtx_unlock(&pr->pr_mtx); 2201191673Sjamie 2202191673Sjamie /* Let modules do whatever they need to prepare for attaching. */ 2203191673Sjamie error = osd_jail_call(pr, PR_METHOD_ATTACH, td); 2204191673Sjamie if (error) { 2205191673Sjamie prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); 2206191673Sjamie return (error); 2207191673Sjamie } 2208168401Spjd sx_sunlock(&allprison_lock); 2209113275Smike 2210185435Sbz /* 2211185435Sbz * Reparent the newly attached process to this jail. 2212185435Sbz */ 2213192895Sjamie ppr = td->td_ucred->cr_prison; 2214191673Sjamie p = td->td_proc; 2215185435Sbz error = cpuset_setproc_update_set(p, pr->pr_cpuset); 2216185435Sbz if (error) 2217191673Sjamie goto e_revert_osd; 2218185435Sbz 2219150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2220175202Sattilio vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 2221113275Smike if ((error = change_dir(pr->pr_root, td)) != 0) 2222113275Smike goto e_unlock; 2223113275Smike#ifdef MAC 2224172930Srwatson if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 2225113275Smike goto e_unlock; 2226113275Smike#endif 2227175294Sattilio VOP_UNLOCK(pr->pr_root, 0); 2228191673Sjamie if ((error = change_root(pr->pr_root, td))) 2229191673Sjamie goto e_unlock_giant; 2230150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 2231113275Smike 223284828Sjhb newcred = crget(); 223384828Sjhb PROC_LOCK(p); 223484828Sjhb oldcred = p->p_ucred; 2235113275Smike setsugid(p); 223684828Sjhb crcopy(newcred, oldcred); 2237113630Sjhb newcred->cr_prison = pr; 223884828Sjhb p->p_ucred = newcred; 223984828Sjhb PROC_UNLOCK(p); 224084828Sjhb crfree(oldcred); 2241192895Sjamie prison_deref(ppr, PD_DEREF | PD_DEUREF); 224246155Sphk return (0); 2243191673Sjamie e_unlock: 2244175294Sattilio VOP_UNLOCK(pr->pr_root, 0); 2245191673Sjamie e_unlock_giant: 2246150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 2247191673Sjamie e_revert_osd: 2248191673Sjamie /* Tell modules this thread is still in its old jail after all. */ 2249192895Sjamie (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td); 2250191673Sjamie prison_deref(pr, PD_DEREF | PD_DEUREF); 225146155Sphk return (error); 225246155Sphk} 225346155Sphk 2254192895Sjamie 2255113275Smike/* 2256113275Smike * Returns a locked prison instance, or NULL on failure. 2257113275Smike */ 2258168399Spjdstruct prison * 2259113275Smikeprison_find(int prid) 2260113275Smike{ 2261113275Smike struct prison *pr; 2262113275Smike 2263168401Spjd sx_assert(&allprison_lock, SX_LOCKED); 2264191673Sjamie TAILQ_FOREACH(pr, &allprison, pr_list) { 2265113275Smike if (pr->pr_id == prid) { 2266113275Smike mtx_lock(&pr->pr_mtx); 2267191673Sjamie if (pr->pr_ref > 0) 2268191673Sjamie return (pr); 2269191673Sjamie mtx_unlock(&pr->pr_mtx); 2270113275Smike } 2271113275Smike } 2272113275Smike return (NULL); 2273113275Smike} 2274113275Smike 2275191673Sjamie/* 2276192895Sjamie * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 2277191673Sjamie */ 2278191673Sjamiestruct prison * 2279192895Sjamieprison_find_child(struct prison *mypr, int prid) 2280191673Sjamie{ 2281192895Sjamie struct prison *pr; 2282192895Sjamie int descend; 2283192895Sjamie 2284192895Sjamie sx_assert(&allprison_lock, SX_LOCKED); 2285192895Sjamie FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2286192895Sjamie if (pr->pr_id == prid) { 2287192895Sjamie mtx_lock(&pr->pr_mtx); 2288192895Sjamie if (pr->pr_ref > 0) 2289192895Sjamie return (pr); 2290192895Sjamie mtx_unlock(&pr->pr_mtx); 2291192895Sjamie } 2292192895Sjamie } 2293192895Sjamie return (NULL); 2294192895Sjamie} 2295192895Sjamie 2296192895Sjamie/* 2297192895Sjamie * Look for the name relative to mypr. Returns a locked prison or NULL. 2298192895Sjamie */ 2299192895Sjamiestruct prison * 2300192895Sjamieprison_find_name(struct prison *mypr, const char *name) 2301192895Sjamie{ 2302191673Sjamie struct prison *pr, *deadpr; 2303192895Sjamie size_t mylen; 2304192895Sjamie int descend; 2305191673Sjamie 2306191673Sjamie sx_assert(&allprison_lock, SX_LOCKED); 2307192895Sjamie mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; 2308191673Sjamie again: 2309191673Sjamie deadpr = NULL; 2310192895Sjamie FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2311192895Sjamie if (!strcmp(pr->pr_name + mylen, name)) { 2312191673Sjamie mtx_lock(&pr->pr_mtx); 2313191673Sjamie if (pr->pr_ref > 0) { 2314191673Sjamie if (pr->pr_uref > 0) 2315191673Sjamie return (pr); 2316191673Sjamie deadpr = pr; 2317191673Sjamie } 2318191673Sjamie mtx_unlock(&pr->pr_mtx); 2319191673Sjamie } 2320191673Sjamie } 2321192895Sjamie /* There was no valid prison - perhaps there was a dying one. */ 2322191673Sjamie if (deadpr != NULL) { 2323191673Sjamie mtx_lock(&deadpr->pr_mtx); 2324191673Sjamie if (deadpr->pr_ref == 0) { 2325191673Sjamie mtx_unlock(&deadpr->pr_mtx); 2326191673Sjamie goto again; 2327191673Sjamie } 2328191673Sjamie } 2329191673Sjamie return (deadpr); 2330191673Sjamie} 2331191673Sjamie 2332191673Sjamie/* 2333192895Sjamie * See if a prison has the specific flag set. 2334192895Sjamie */ 2335192895Sjamieint 2336192895Sjamieprison_flag(struct ucred *cred, unsigned flag) 2337192895Sjamie{ 2338192895Sjamie 2339192895Sjamie /* This is an atomic read, so no locking is necessary. */ 2340192895Sjamie return (cred->cr_prison->pr_flags & flag); 2341192895Sjamie} 2342192895Sjamie 2343192895Sjamieint 2344192895Sjamieprison_allow(struct ucred *cred, unsigned flag) 2345192895Sjamie{ 2346192895Sjamie 2347192895Sjamie /* This is an atomic read, so no locking is necessary. */ 2348192895Sjamie return (cred->cr_prison->pr_allow & flag); 2349192895Sjamie} 2350192895Sjamie 2351192895Sjamie/* 2352191673Sjamie * Remove a prison reference. If that was the last reference, remove the 2353191673Sjamie * prison itself - but not in this context in case there are locks held. 2354191673Sjamie */ 235572786Srwatsonvoid 2356185029Spjdprison_free_locked(struct prison *pr) 235772786Srwatson{ 235872786Srwatson 2359185029Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 236072786Srwatson pr->pr_ref--; 236172786Srwatson if (pr->pr_ref == 0) { 2362168483Spjd mtx_unlock(&pr->pr_mtx); 2363124882Srwatson TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 2364144660Sjeff taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 236587275Srwatson return; 236672786Srwatson } 236787275Srwatson mtx_unlock(&pr->pr_mtx); 236872786Srwatson} 236972786Srwatson 2370185029Spjdvoid 2371185029Spjdprison_free(struct prison *pr) 2372185029Spjd{ 2373185029Spjd 2374185029Spjd mtx_lock(&pr->pr_mtx); 2375185029Spjd prison_free_locked(pr); 2376185029Spjd} 2377185029Spjd 2378124882Srwatsonstatic void 2379124882Srwatsonprison_complete(void *context, int pending) 2380124882Srwatson{ 2381191673Sjamie 2382191673Sjamie prison_deref((struct prison *)context, 0); 2383191673Sjamie} 2384191673Sjamie 2385191673Sjamie/* 2386191673Sjamie * Remove a prison reference (usually). This internal version assumes no 2387191673Sjamie * mutexes are held, except perhaps the prison itself. If there are no more 2388191673Sjamie * references, release and delist the prison. On completion, the prison lock 2389191673Sjamie * and the allprison lock are both unlocked. 2390191673Sjamie */ 2391191673Sjamiestatic void 2392191673Sjamieprison_deref(struct prison *pr, int flags) 2393191673Sjamie{ 2394192895Sjamie struct prison *ppr, *tpr; 2395150652Scsjp int vfslocked; 2396124882Srwatson 2397191673Sjamie if (!(flags & PD_LOCKED)) 2398191673Sjamie mtx_lock(&pr->pr_mtx); 2399192895Sjamie /* Decrement the user references in a separate loop. */ 2400191673Sjamie if (flags & PD_DEUREF) { 2401192895Sjamie for (tpr = pr;; tpr = tpr->pr_parent) { 2402192895Sjamie if (tpr != pr) 2403192895Sjamie mtx_lock(&tpr->pr_mtx); 2404192895Sjamie if (--tpr->pr_uref > 0) 2405192895Sjamie break; 2406192895Sjamie KASSERT(tpr != &prison0, ("prison0 pr_uref=0")); 2407192895Sjamie mtx_unlock(&tpr->pr_mtx); 2408192895Sjamie } 2409191673Sjamie /* Done if there were only user references to remove. */ 2410191673Sjamie if (!(flags & PD_DEREF)) { 2411192895Sjamie mtx_unlock(&tpr->pr_mtx); 2412191673Sjamie if (flags & PD_LIST_SLOCKED) 2413191673Sjamie sx_sunlock(&allprison_lock); 2414191673Sjamie else if (flags & PD_LIST_XLOCKED) 2415191673Sjamie sx_xunlock(&allprison_lock); 2416191673Sjamie return; 2417191673Sjamie } 2418192895Sjamie if (tpr != pr) { 2419192895Sjamie mtx_unlock(&tpr->pr_mtx); 2420192895Sjamie mtx_lock(&pr->pr_mtx); 2421192895Sjamie } 2422191673Sjamie } 2423124882Srwatson 2424192895Sjamie for (;;) { 2425192895Sjamie if (flags & PD_DEREF) 2426192895Sjamie pr->pr_ref--; 2427192895Sjamie /* If the prison still has references, nothing else to do. */ 2428192895Sjamie if (pr->pr_ref > 0) { 2429192895Sjamie mtx_unlock(&pr->pr_mtx); 2430192895Sjamie if (flags & PD_LIST_SLOCKED) 2431192895Sjamie sx_sunlock(&allprison_lock); 2432192895Sjamie else if (flags & PD_LIST_XLOCKED) 2433192895Sjamie sx_xunlock(&allprison_lock); 2434192895Sjamie return; 2435191673Sjamie } 2436191673Sjamie 2437192895Sjamie mtx_unlock(&pr->pr_mtx); 2438192895Sjamie if (flags & PD_LIST_SLOCKED) { 2439192895Sjamie if (!sx_try_upgrade(&allprison_lock)) { 2440192895Sjamie sx_sunlock(&allprison_lock); 2441192895Sjamie sx_xlock(&allprison_lock); 2442192895Sjamie } 2443192895Sjamie } else if (!(flags & PD_LIST_XLOCKED)) 2444192895Sjamie sx_xlock(&allprison_lock); 2445168489Spjd 2446192895Sjamie TAILQ_REMOVE(&allprison, pr, pr_list); 2447192895Sjamie LIST_REMOVE(pr, pr_sibling); 2448192895Sjamie ppr = pr->pr_parent; 2449192895Sjamie for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 2450194762Sjamie tpr->pr_childcount--; 2451192895Sjamie sx_downgrade(&allprison_lock); 2452192895Sjamie 2453194251Sjamie#ifdef VIMAGE 2454194251Sjamie if (pr->pr_flags & PR_VNET) 2455194251Sjamie vnet_destroy(pr->pr_vnet); 2456194251Sjamie#endif 2457192895Sjamie if (pr->pr_root != NULL) { 2458192895Sjamie vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2459192895Sjamie vrele(pr->pr_root); 2460192895Sjamie VFS_UNLOCK_GIANT(vfslocked); 2461192895Sjamie } 2462192895Sjamie mtx_destroy(&pr->pr_mtx); 2463191673Sjamie#ifdef INET 2464192895Sjamie free(pr->pr_ip4, M_PRISON); 2465191673Sjamie#endif 2466185435Sbz#ifdef INET6 2467192895Sjamie free(pr->pr_ip6, M_PRISON); 2468185435Sbz#endif 2469192895Sjamie if (pr->pr_cpuset != NULL) 2470192895Sjamie cpuset_rel(pr->pr_cpuset); 2471192895Sjamie osd_jail_exit(pr); 2472192895Sjamie free(pr, M_PRISON); 2473192895Sjamie 2474192895Sjamie /* Removing a prison frees a reference on its parent. */ 2475192895Sjamie pr = ppr; 2476192895Sjamie mtx_lock(&pr->pr_mtx); 2477192895Sjamie flags = PD_DEREF | PD_LIST_SLOCKED; 2478192895Sjamie } 2479124882Srwatson} 2480124882Srwatson 248172786Srwatsonvoid 2482185029Spjdprison_hold_locked(struct prison *pr) 248372786Srwatson{ 248472786Srwatson 2485185029Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 2486168489Spjd KASSERT(pr->pr_ref > 0, 2487191671Sjamie ("Trying to hold dead prison (jid=%d).", pr->pr_id)); 248872786Srwatson pr->pr_ref++; 2489185029Spjd} 2490185029Spjd 2491185029Spjdvoid 2492185029Spjdprison_hold(struct prison *pr) 2493185029Spjd{ 2494185029Spjd 2495185029Spjd mtx_lock(&pr->pr_mtx); 2496185029Spjd prison_hold_locked(pr); 249787275Srwatson mtx_unlock(&pr->pr_mtx); 249872786Srwatson} 249972786Srwatson 2500185435Sbzvoid 2501185435Sbzprison_proc_hold(struct prison *pr) 250287275Srwatson{ 250387275Srwatson 2504185435Sbz mtx_lock(&pr->pr_mtx); 2505191673Sjamie KASSERT(pr->pr_uref > 0, 2506191673Sjamie ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); 2507191673Sjamie pr->pr_uref++; 2508185435Sbz mtx_unlock(&pr->pr_mtx); 250987275Srwatson} 251087275Srwatson 2511185435Sbzvoid 2512185435Sbzprison_proc_free(struct prison *pr) 2513185435Sbz{ 2514185435Sbz 2515185435Sbz mtx_lock(&pr->pr_mtx); 2516191673Sjamie KASSERT(pr->pr_uref > 0, 2517191673Sjamie ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); 2518191673Sjamie prison_deref(pr, PD_DEUREF | PD_LOCKED); 2519185435Sbz} 2520185435Sbz 2521185435Sbz 2522185435Sbz#ifdef INET 2523185435Sbz/* 2524192895Sjamie * Restrict a prison's IP address list with its parent's, possibly replacing 2525192895Sjamie * it. Return true if the replacement buffer was used (or would have been). 2526192895Sjamie */ 2527192895Sjamiestatic int 2528192895Sjamieprison_restrict_ip4(struct prison *pr, struct in_addr *newip4) 2529192895Sjamie{ 2530192895Sjamie int ii, ij, used; 2531192895Sjamie struct prison *ppr; 2532192895Sjamie 2533192895Sjamie ppr = pr->pr_parent; 2534192895Sjamie if (!(pr->pr_flags & PR_IP4_USER)) { 2535192895Sjamie /* This has no user settings, so just copy the parent's list. */ 2536192895Sjamie if (pr->pr_ip4s < ppr->pr_ip4s) { 2537192895Sjamie /* 2538192895Sjamie * There's no room for the parent's list. Use the 2539192895Sjamie * new list buffer, which is assumed to be big enough 2540192895Sjamie * (if it was passed). If there's no buffer, try to 2541192895Sjamie * allocate one. 2542192895Sjamie */ 2543192895Sjamie used = 1; 2544192895Sjamie if (newip4 == NULL) { 2545192895Sjamie newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), 2546192895Sjamie M_PRISON, M_NOWAIT); 2547192895Sjamie if (newip4 != NULL) 2548192895Sjamie used = 0; 2549192895Sjamie } 2550192895Sjamie if (newip4 != NULL) { 2551192895Sjamie bcopy(ppr->pr_ip4, newip4, 2552192895Sjamie ppr->pr_ip4s * sizeof(*newip4)); 2553192895Sjamie free(pr->pr_ip4, M_PRISON); 2554192895Sjamie pr->pr_ip4 = newip4; 2555192895Sjamie pr->pr_ip4s = ppr->pr_ip4s; 2556192895Sjamie } 2557192895Sjamie return (used); 2558192895Sjamie } 2559192895Sjamie pr->pr_ip4s = ppr->pr_ip4s; 2560192895Sjamie if (pr->pr_ip4s > 0) 2561192895Sjamie bcopy(ppr->pr_ip4, pr->pr_ip4, 2562192895Sjamie pr->pr_ip4s * sizeof(*newip4)); 2563192895Sjamie else if (pr->pr_ip4 != NULL) { 2564192895Sjamie free(pr->pr_ip4, M_PRISON); 2565192895Sjamie pr->pr_ip4 = NULL; 2566192895Sjamie } 2567195974Sjamie } else if (pr->pr_ip4s > 0) { 2568192895Sjamie /* Remove addresses that aren't in the parent. */ 2569192895Sjamie for (ij = 0; ij < ppr->pr_ip4s; ij++) 2570192895Sjamie if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 2571192895Sjamie break; 2572192895Sjamie if (ij < ppr->pr_ip4s) 2573192895Sjamie ii = 1; 2574192895Sjamie else { 2575192895Sjamie bcopy(pr->pr_ip4 + 1, pr->pr_ip4, 2576192895Sjamie --pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2577192895Sjamie ii = 0; 2578192895Sjamie } 2579192895Sjamie for (ij = 1; ii < pr->pr_ip4s; ) { 2580192895Sjamie if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { 2581192895Sjamie ii++; 2582192895Sjamie continue; 2583192895Sjamie } 2584192895Sjamie switch (ij >= ppr->pr_ip4s ? -1 : 2585192895Sjamie qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { 2586192895Sjamie case -1: 2587192895Sjamie bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, 2588192895Sjamie (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); 2589192895Sjamie break; 2590192895Sjamie case 0: 2591192895Sjamie ii++; 2592192895Sjamie ij++; 2593192895Sjamie break; 2594192895Sjamie case 1: 2595192895Sjamie ij++; 2596192895Sjamie break; 2597192895Sjamie } 2598192895Sjamie } 2599192895Sjamie if (pr->pr_ip4s == 0) { 2600195870Sjamie pr->pr_flags |= PR_IP4_DISABLE; 2601192895Sjamie free(pr->pr_ip4, M_PRISON); 2602192895Sjamie pr->pr_ip4 = NULL; 2603192895Sjamie } 2604192895Sjamie } 2605192895Sjamie return (0); 2606192895Sjamie} 2607192895Sjamie 2608192895Sjamie/* 2609185435Sbz * Pass back primary IPv4 address of this jail. 2610185435Sbz * 2611192895Sjamie * If not restricted return success but do not alter the address. Caller has 2612192895Sjamie * to make sure to initialize it correctly (e.g. INADDR_ANY). 2613185435Sbz * 2614188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2615188144Sjamie * Address returned in NBO. 2616185435Sbz */ 261746155Sphkint 2618187684Sbzprison_get_ip4(struct ucred *cred, struct in_addr *ia) 261946155Sphk{ 2620191673Sjamie struct prison *pr; 262146155Sphk 2622185435Sbz KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2623185435Sbz KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2624185435Sbz 2625192895Sjamie pr = cred->cr_prison; 2626192895Sjamie if (!(pr->pr_flags & PR_IP4)) 262746155Sphk return (0); 2628191673Sjamie mtx_lock(&pr->pr_mtx); 2629192895Sjamie if (!(pr->pr_flags & PR_IP4)) { 2630192895Sjamie mtx_unlock(&pr->pr_mtx); 2631192895Sjamie return (0); 2632192895Sjamie } 2633191673Sjamie if (pr->pr_ip4 == NULL) { 2634191673Sjamie mtx_unlock(&pr->pr_mtx); 2635188144Sjamie return (EAFNOSUPPORT); 2636191673Sjamie } 2637185435Sbz 2638191673Sjamie ia->s_addr = pr->pr_ip4[0].s_addr; 2639191673Sjamie mtx_unlock(&pr->pr_mtx); 2640185435Sbz return (0); 2641185435Sbz} 2642185435Sbz 2643185435Sbz/* 2644192895Sjamie * Return true if pr1 and pr2 have the same IPv4 address restrictions. 2645192895Sjamie */ 2646192895Sjamieint 2647192895Sjamieprison_equal_ip4(struct prison *pr1, struct prison *pr2) 2648192895Sjamie{ 2649192895Sjamie 2650192895Sjamie if (pr1 == pr2) 2651192895Sjamie return (1); 2652192895Sjamie 2653192895Sjamie /* 2654195974Sjamie * No need to lock since the PR_IP4_USER flag can't be altered for 2655195974Sjamie * existing prisons. 2656192895Sjamie */ 2657195945Sjamie while (pr1 != &prison0 && 2658195945Sjamie#ifdef VIMAGE 2659195945Sjamie !(pr1->pr_flags & PR_VNET) && 2660195945Sjamie#endif 2661195945Sjamie !(pr1->pr_flags & PR_IP4_USER)) 2662192895Sjamie pr1 = pr1->pr_parent; 2663195945Sjamie while (pr2 != &prison0 && 2664195945Sjamie#ifdef VIMAGE 2665195945Sjamie !(pr2->pr_flags & PR_VNET) && 2666195945Sjamie#endif 2667195945Sjamie !(pr2->pr_flags & PR_IP4_USER)) 2668192895Sjamie pr2 = pr2->pr_parent; 2669192895Sjamie return (pr1 == pr2); 2670192895Sjamie} 2671192895Sjamie 2672192895Sjamie/* 2673185435Sbz * Make sure our (source) address is set to something meaningful to this 2674185435Sbz * jail. 2675185435Sbz * 2676192895Sjamie * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2677192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2678192895Sjamie * doesn't allow IPv4. Address passed in in NBO and returned in NBO. 2679185435Sbz */ 2680185435Sbzint 2681185435Sbzprison_local_ip4(struct ucred *cred, struct in_addr *ia) 2682185435Sbz{ 2683191673Sjamie struct prison *pr; 2684185435Sbz struct in_addr ia0; 2685191673Sjamie int error; 2686185435Sbz 2687185435Sbz KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2688185435Sbz KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2689185435Sbz 2690192895Sjamie pr = cred->cr_prison; 2691192895Sjamie if (!(pr->pr_flags & PR_IP4)) 269246155Sphk return (0); 2693191673Sjamie mtx_lock(&pr->pr_mtx); 2694192895Sjamie if (!(pr->pr_flags & PR_IP4)) { 2695192895Sjamie mtx_unlock(&pr->pr_mtx); 2696192895Sjamie return (0); 2697192895Sjamie } 2698191673Sjamie if (pr->pr_ip4 == NULL) { 2699191673Sjamie mtx_unlock(&pr->pr_mtx); 2700188144Sjamie return (EAFNOSUPPORT); 2701191673Sjamie } 2702185435Sbz 2703185435Sbz ia0.s_addr = ntohl(ia->s_addr); 2704185435Sbz if (ia0.s_addr == INADDR_LOOPBACK) { 2705191673Sjamie ia->s_addr = pr->pr_ip4[0].s_addr; 2706191673Sjamie mtx_unlock(&pr->pr_mtx); 2707185435Sbz return (0); 270846155Sphk } 2709185435Sbz 2710188144Sjamie if (ia0.s_addr == INADDR_ANY) { 2711188144Sjamie /* 2712188144Sjamie * In case there is only 1 IPv4 address, bind directly. 2713188144Sjamie */ 2714191673Sjamie if (pr->pr_ip4s == 1) 2715191673Sjamie ia->s_addr = pr->pr_ip4[0].s_addr; 2716191673Sjamie mtx_unlock(&pr->pr_mtx); 2717185435Sbz return (0); 2718185435Sbz } 2719185435Sbz 2720191673Sjamie error = _prison_check_ip4(pr, ia); 2721191673Sjamie mtx_unlock(&pr->pr_mtx); 2722191673Sjamie return (error); 2723185435Sbz} 2724185435Sbz 2725185435Sbz/* 2726185435Sbz * Rewrite destination address in case we will connect to loopback address. 2727185435Sbz * 2728188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2729188144Sjamie * Address passed in in NBO and returned in NBO. 2730185435Sbz */ 2731185435Sbzint 2732185435Sbzprison_remote_ip4(struct ucred *cred, struct in_addr *ia) 2733185435Sbz{ 2734191673Sjamie struct prison *pr; 2735185435Sbz 2736185435Sbz KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2737185435Sbz KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2738185435Sbz 2739192895Sjamie pr = cred->cr_prison; 2740192895Sjamie if (!(pr->pr_flags & PR_IP4)) 2741185435Sbz return (0); 2742191673Sjamie mtx_lock(&pr->pr_mtx); 2743192895Sjamie if (!(pr->pr_flags & PR_IP4)) { 2744192895Sjamie mtx_unlock(&pr->pr_mtx); 2745192895Sjamie return (0); 2746192895Sjamie } 2747191673Sjamie if (pr->pr_ip4 == NULL) { 2748191673Sjamie mtx_unlock(&pr->pr_mtx); 2749188144Sjamie return (EAFNOSUPPORT); 2750191673Sjamie } 2751188144Sjamie 2752185435Sbz if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { 2753191673Sjamie ia->s_addr = pr->pr_ip4[0].s_addr; 2754191673Sjamie mtx_unlock(&pr->pr_mtx); 2755185435Sbz return (0); 2756185435Sbz } 2757185435Sbz 2758185435Sbz /* 2759185435Sbz * Return success because nothing had to be changed. 2760185435Sbz */ 2761191673Sjamie mtx_unlock(&pr->pr_mtx); 2762185435Sbz return (0); 2763185435Sbz} 2764185435Sbz 2765185435Sbz/* 2766188144Sjamie * Check if given address belongs to the jail referenced by cred/prison. 2767185435Sbz * 2768192895Sjamie * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2769192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2770192895Sjamie * doesn't allow IPv4. Address passed in in NBO. 2771185435Sbz */ 2772185435Sbzstatic int 2773185435Sbz_prison_check_ip4(struct prison *pr, struct in_addr *ia) 2774185435Sbz{ 2775185435Sbz int i, a, z, d; 2776185435Sbz 2777185435Sbz /* 2778185435Sbz * Check the primary IP. 2779185435Sbz */ 2780185435Sbz if (pr->pr_ip4[0].s_addr == ia->s_addr) 2781188144Sjamie return (0); 2782185435Sbz 2783185435Sbz /* 2784185435Sbz * All the other IPs are sorted so we can do a binary search. 2785185435Sbz */ 2786185435Sbz a = 0; 2787185435Sbz z = pr->pr_ip4s - 2; 2788185435Sbz while (a <= z) { 2789185435Sbz i = (a + z) / 2; 2790185435Sbz d = qcmp_v4(&pr->pr_ip4[i+1], ia); 2791185435Sbz if (d > 0) 2792185435Sbz z = i - 1; 2793185435Sbz else if (d < 0) 2794185435Sbz a = i + 1; 279581114Srwatson else 2796188144Sjamie return (0); 2797185435Sbz } 2798188144Sjamie 2799188144Sjamie return (EADDRNOTAVAIL); 2800185435Sbz} 2801185435Sbz 2802185435Sbzint 2803185435Sbzprison_check_ip4(struct ucred *cred, struct in_addr *ia) 2804185435Sbz{ 2805191673Sjamie struct prison *pr; 2806191673Sjamie int error; 2807185435Sbz 2808185435Sbz KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2809185435Sbz KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2810185435Sbz 2811192895Sjamie pr = cred->cr_prison; 2812192895Sjamie if (!(pr->pr_flags & PR_IP4)) 2813188144Sjamie return (0); 2814191673Sjamie mtx_lock(&pr->pr_mtx); 2815192895Sjamie if (!(pr->pr_flags & PR_IP4)) { 2816192895Sjamie mtx_unlock(&pr->pr_mtx); 2817192895Sjamie return (0); 2818192895Sjamie } 2819191673Sjamie if (pr->pr_ip4 == NULL) { 2820191673Sjamie mtx_unlock(&pr->pr_mtx); 2821188144Sjamie return (EAFNOSUPPORT); 2822191673Sjamie } 2823185435Sbz 2824191673Sjamie error = _prison_check_ip4(pr, ia); 2825191673Sjamie mtx_unlock(&pr->pr_mtx); 2826191673Sjamie return (error); 2827185435Sbz} 2828185435Sbz#endif 2829185435Sbz 2830185435Sbz#ifdef INET6 2831192895Sjamiestatic int 2832192895Sjamieprison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) 2833192895Sjamie{ 2834192895Sjamie int ii, ij, used; 2835192895Sjamie struct prison *ppr; 2836192895Sjamie 2837192895Sjamie ppr = pr->pr_parent; 2838192895Sjamie if (!(pr->pr_flags & PR_IP6_USER)) { 2839192895Sjamie /* This has no user settings, so just copy the parent's list. */ 2840192895Sjamie if (pr->pr_ip6s < ppr->pr_ip6s) { 2841192895Sjamie /* 2842192895Sjamie * There's no room for the parent's list. Use the 2843192895Sjamie * new list buffer, which is assumed to be big enough 2844192895Sjamie * (if it was passed). If there's no buffer, try to 2845192895Sjamie * allocate one. 2846192895Sjamie */ 2847192895Sjamie used = 1; 2848192895Sjamie if (newip6 == NULL) { 2849192895Sjamie newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), 2850192895Sjamie M_PRISON, M_NOWAIT); 2851192895Sjamie if (newip6 != NULL) 2852192895Sjamie used = 0; 2853192895Sjamie } 2854192895Sjamie if (newip6 != NULL) { 2855192895Sjamie bcopy(ppr->pr_ip6, newip6, 2856192895Sjamie ppr->pr_ip6s * sizeof(*newip6)); 2857192895Sjamie free(pr->pr_ip6, M_PRISON); 2858192895Sjamie pr->pr_ip6 = newip6; 2859192895Sjamie pr->pr_ip6s = ppr->pr_ip6s; 2860192895Sjamie } 2861192895Sjamie return (used); 2862192895Sjamie } 2863192895Sjamie pr->pr_ip6s = ppr->pr_ip6s; 2864192895Sjamie if (pr->pr_ip6s > 0) 2865192895Sjamie bcopy(ppr->pr_ip6, pr->pr_ip6, 2866192895Sjamie pr->pr_ip6s * sizeof(*newip6)); 2867192895Sjamie else if (pr->pr_ip6 != NULL) { 2868192895Sjamie free(pr->pr_ip6, M_PRISON); 2869192895Sjamie pr->pr_ip6 = NULL; 2870192895Sjamie } 2871195974Sjamie } else if (pr->pr_ip6s > 0) { 2872192895Sjamie /* Remove addresses that aren't in the parent. */ 2873192895Sjamie for (ij = 0; ij < ppr->pr_ip6s; ij++) 2874192895Sjamie if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], 2875192895Sjamie &ppr->pr_ip6[ij])) 2876192895Sjamie break; 2877192895Sjamie if (ij < ppr->pr_ip6s) 2878192895Sjamie ii = 1; 2879192895Sjamie else { 2880192895Sjamie bcopy(pr->pr_ip6 + 1, pr->pr_ip6, 2881192895Sjamie --pr->pr_ip6s * sizeof(*pr->pr_ip6)); 2882192895Sjamie ii = 0; 2883192895Sjamie } 2884192895Sjamie for (ij = 1; ii < pr->pr_ip6s; ) { 2885192895Sjamie if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], 2886192895Sjamie &ppr->pr_ip6[0])) { 2887192895Sjamie ii++; 2888192895Sjamie continue; 2889192895Sjamie } 2890192895Sjamie switch (ij >= ppr->pr_ip4s ? -1 : 2891192895Sjamie qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { 2892192895Sjamie case -1: 2893192895Sjamie bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, 2894192895Sjamie (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); 2895192895Sjamie break; 2896192895Sjamie case 0: 2897192895Sjamie ii++; 2898192895Sjamie ij++; 2899192895Sjamie break; 2900192895Sjamie case 1: 2901192895Sjamie ij++; 2902192895Sjamie break; 2903192895Sjamie } 2904192895Sjamie } 2905192895Sjamie if (pr->pr_ip6s == 0) { 2906195870Sjamie pr->pr_flags |= PR_IP6_DISABLE; 2907192895Sjamie free(pr->pr_ip6, M_PRISON); 2908192895Sjamie pr->pr_ip6 = NULL; 2909192895Sjamie } 2910192895Sjamie } 2911192895Sjamie return 0; 2912192895Sjamie} 2913192895Sjamie 2914185435Sbz/* 2915185435Sbz * Pass back primary IPv6 address for this jail. 2916185435Sbz * 2917192895Sjamie * If not restricted return success but do not alter the address. Caller has 2918192895Sjamie * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). 2919185435Sbz * 2920188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 2921185435Sbz */ 2922185435Sbzint 2923187684Sbzprison_get_ip6(struct ucred *cred, struct in6_addr *ia6) 2924185435Sbz{ 2925191673Sjamie struct prison *pr; 2926185435Sbz 2927185435Sbz KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2928185435Sbz KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2929185435Sbz 2930192895Sjamie pr = cred->cr_prison; 2931192895Sjamie if (!(pr->pr_flags & PR_IP6)) 293281114Srwatson return (0); 2933191673Sjamie mtx_lock(&pr->pr_mtx); 2934192895Sjamie if (!(pr->pr_flags & PR_IP6)) { 2935192895Sjamie mtx_unlock(&pr->pr_mtx); 2936192895Sjamie return (0); 2937192895Sjamie } 2938191673Sjamie if (pr->pr_ip6 == NULL) { 2939191673Sjamie mtx_unlock(&pr->pr_mtx); 2940188144Sjamie return (EAFNOSUPPORT); 2941191673Sjamie } 2942188144Sjamie 2943191673Sjamie bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2944191673Sjamie mtx_unlock(&pr->pr_mtx); 2945185435Sbz return (0); 2946185435Sbz} 2947185435Sbz 2948185435Sbz/* 2949192895Sjamie * Return true if pr1 and pr2 have the same IPv6 address restrictions. 2950192895Sjamie */ 2951192895Sjamieint 2952192895Sjamieprison_equal_ip6(struct prison *pr1, struct prison *pr2) 2953192895Sjamie{ 2954192895Sjamie 2955192895Sjamie if (pr1 == pr2) 2956192895Sjamie return (1); 2957192895Sjamie 2958195945Sjamie while (pr1 != &prison0 && 2959195945Sjamie#ifdef VIMAGE 2960195945Sjamie !(pr1->pr_flags & PR_VNET) && 2961195945Sjamie#endif 2962195945Sjamie !(pr1->pr_flags & PR_IP6_USER)) 2963192895Sjamie pr1 = pr1->pr_parent; 2964195945Sjamie while (pr2 != &prison0 && 2965195945Sjamie#ifdef VIMAGE 2966195945Sjamie !(pr2->pr_flags & PR_VNET) && 2967195945Sjamie#endif 2968195945Sjamie !(pr2->pr_flags & PR_IP6_USER)) 2969192895Sjamie pr2 = pr2->pr_parent; 2970192895Sjamie return (pr1 == pr2); 2971192895Sjamie} 2972192895Sjamie 2973192895Sjamie/* 2974185435Sbz * Make sure our (source) address is set to something meaningful to this jail. 2975185435Sbz * 2976185435Sbz * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) 2977185435Sbz * when needed while binding. 2978185435Sbz * 2979192895Sjamie * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 2980192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2981192895Sjamie * doesn't allow IPv6. 2982185435Sbz */ 2983185435Sbzint 2984185435Sbzprison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) 2985185435Sbz{ 2986191673Sjamie struct prison *pr; 2987191673Sjamie int error; 2988185435Sbz 2989185435Sbz KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2990185435Sbz KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2991185435Sbz 2992192895Sjamie pr = cred->cr_prison; 2993192895Sjamie if (!(pr->pr_flags & PR_IP6)) 2994185435Sbz return (0); 2995191673Sjamie mtx_lock(&pr->pr_mtx); 2996192895Sjamie if (!(pr->pr_flags & PR_IP6)) { 2997192895Sjamie mtx_unlock(&pr->pr_mtx); 2998192895Sjamie return (0); 2999192895Sjamie } 3000191673Sjamie if (pr->pr_ip6 == NULL) { 3001191673Sjamie mtx_unlock(&pr->pr_mtx); 3002188144Sjamie return (EAFNOSUPPORT); 3003191673Sjamie } 3004188144Sjamie 3005185435Sbz if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3006191673Sjamie bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3007191673Sjamie mtx_unlock(&pr->pr_mtx); 3008185435Sbz return (0); 300981114Srwatson } 3010185435Sbz 3011188144Sjamie if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { 3012188144Sjamie /* 3013188144Sjamie * In case there is only 1 IPv6 address, and v6only is true, 3014188144Sjamie * then bind directly. 3015188144Sjamie */ 3016191673Sjamie if (v6only != 0 && pr->pr_ip6s == 1) 3017191673Sjamie bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3018191673Sjamie mtx_unlock(&pr->pr_mtx); 3019185435Sbz return (0); 3020185435Sbz } 3021188144Sjamie 3022191673Sjamie error = _prison_check_ip6(pr, ia6); 3023191673Sjamie mtx_unlock(&pr->pr_mtx); 3024191673Sjamie return (error); 3025185435Sbz} 3026185435Sbz 3027185435Sbz/* 3028185435Sbz * Rewrite destination address in case we will connect to loopback address. 3029185435Sbz * 3030188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 3031185435Sbz */ 3032185435Sbzint 3033185435Sbzprison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) 3034185435Sbz{ 3035191673Sjamie struct prison *pr; 3036185435Sbz 3037185435Sbz KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3038185435Sbz KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3039185435Sbz 3040192895Sjamie pr = cred->cr_prison; 3041192895Sjamie if (!(pr->pr_flags & PR_IP6)) 3042185435Sbz return (0); 3043191673Sjamie mtx_lock(&pr->pr_mtx); 3044192895Sjamie if (!(pr->pr_flags & PR_IP6)) { 3045192895Sjamie mtx_unlock(&pr->pr_mtx); 3046192895Sjamie return (0); 3047192895Sjamie } 3048191673Sjamie if (pr->pr_ip6 == NULL) { 3049191673Sjamie mtx_unlock(&pr->pr_mtx); 3050188144Sjamie return (EAFNOSUPPORT); 3051191673Sjamie } 3052188144Sjamie 3053185435Sbz if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3054191673Sjamie bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3055191673Sjamie mtx_unlock(&pr->pr_mtx); 3056185435Sbz return (0); 3057185435Sbz } 3058185435Sbz 3059185435Sbz /* 3060185435Sbz * Return success because nothing had to be changed. 3061185435Sbz */ 3062191673Sjamie mtx_unlock(&pr->pr_mtx); 306346155Sphk return (0); 306446155Sphk} 306546155Sphk 3066185435Sbz/* 3067188144Sjamie * Check if given address belongs to the jail referenced by cred/prison. 3068185435Sbz * 3069192895Sjamie * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3070192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3071192895Sjamie * doesn't allow IPv6. 3072185435Sbz */ 3073185435Sbzstatic int 3074185435Sbz_prison_check_ip6(struct prison *pr, struct in6_addr *ia6) 307546155Sphk{ 3076185435Sbz int i, a, z, d; 307746155Sphk 3078185435Sbz /* 3079185435Sbz * Check the primary IP. 3080185435Sbz */ 3081185435Sbz if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) 3082188144Sjamie return (0); 3083185435Sbz 3084185435Sbz /* 3085185435Sbz * All the other IPs are sorted so we can do a binary search. 3086185435Sbz */ 3087185435Sbz a = 0; 3088185435Sbz z = pr->pr_ip6s - 2; 3089185435Sbz while (a <= z) { 3090185435Sbz i = (a + z) / 2; 3091185435Sbz d = qcmp_v6(&pr->pr_ip6[i+1], ia6); 3092185435Sbz if (d > 0) 3093185435Sbz z = i - 1; 3094185435Sbz else if (d < 0) 3095185435Sbz a = i + 1; 309646155Sphk else 3097188144Sjamie return (0); 309846155Sphk } 3099188144Sjamie 3100188144Sjamie return (EADDRNOTAVAIL); 310146155Sphk} 310246155Sphk 310346155Sphkint 3104185435Sbzprison_check_ip6(struct ucred *cred, struct in6_addr *ia6) 3105185435Sbz{ 3106191673Sjamie struct prison *pr; 3107191673Sjamie int error; 3108185435Sbz 3109185435Sbz KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3110185435Sbz KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3111185435Sbz 3112192895Sjamie pr = cred->cr_prison; 3113192895Sjamie if (!(pr->pr_flags & PR_IP6)) 3114188144Sjamie return (0); 3115191673Sjamie mtx_lock(&pr->pr_mtx); 3116192895Sjamie if (!(pr->pr_flags & PR_IP6)) { 3117192895Sjamie mtx_unlock(&pr->pr_mtx); 3118192895Sjamie return (0); 3119192895Sjamie } 3120191673Sjamie if (pr->pr_ip6 == NULL) { 3121191673Sjamie mtx_unlock(&pr->pr_mtx); 3122188144Sjamie return (EAFNOSUPPORT); 3123191673Sjamie } 3124185435Sbz 3125191673Sjamie error = _prison_check_ip6(pr, ia6); 3126191673Sjamie mtx_unlock(&pr->pr_mtx); 3127191673Sjamie return (error); 3128185435Sbz} 3129185435Sbz#endif 3130185435Sbz 3131185435Sbz/* 3132188146Sjamie * Check if a jail supports the given address family. 3133188146Sjamie * 3134188146Sjamie * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT 3135188146Sjamie * if not. 3136188146Sjamie */ 3137188146Sjamieint 3138188146Sjamieprison_check_af(struct ucred *cred, int af) 3139188146Sjamie{ 3140192895Sjamie struct prison *pr; 3141188146Sjamie int error; 3142188146Sjamie 3143188146Sjamie KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3144188146Sjamie 3145192895Sjamie pr = cred->cr_prison; 3146194923Sjamie#ifdef VIMAGE 3147194915Sjamie /* Prisons with their own network stack are not limited. */ 3148194915Sjamie if (pr->pr_flags & PR_VNET) 3149194915Sjamie return (0); 3150194923Sjamie#endif 3151194915Sjamie 3152188146Sjamie error = 0; 3153188146Sjamie switch (af) 3154188146Sjamie { 3155188146Sjamie#ifdef INET 3156188146Sjamie case AF_INET: 3157192895Sjamie if (pr->pr_flags & PR_IP4) 3158192895Sjamie { 3159192895Sjamie mtx_lock(&pr->pr_mtx); 3160192895Sjamie if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL) 3161192895Sjamie error = EAFNOSUPPORT; 3162192895Sjamie mtx_unlock(&pr->pr_mtx); 3163192895Sjamie } 3164188146Sjamie break; 3165188146Sjamie#endif 3166188146Sjamie#ifdef INET6 3167188146Sjamie case AF_INET6: 3168192895Sjamie if (pr->pr_flags & PR_IP6) 3169192895Sjamie { 3170192895Sjamie mtx_lock(&pr->pr_mtx); 3171192895Sjamie if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL) 3172192895Sjamie error = EAFNOSUPPORT; 3173192895Sjamie mtx_unlock(&pr->pr_mtx); 3174192895Sjamie } 3175188146Sjamie break; 3176188146Sjamie#endif 3177188146Sjamie case AF_LOCAL: 3178188146Sjamie case AF_ROUTE: 3179188146Sjamie break; 3180188146Sjamie default: 3181192895Sjamie if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) 3182188146Sjamie error = EAFNOSUPPORT; 3183188146Sjamie } 3184188146Sjamie return (error); 3185188146Sjamie} 3186188146Sjamie 3187188146Sjamie/* 3188185435Sbz * Check if given address belongs to the jail referenced by cred (wrapper to 3189185435Sbz * prison_check_ip[46]). 3190185435Sbz * 3191192895Sjamie * Returns 0 if jail doesn't restrict the address family or if address belongs 3192192895Sjamie * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if 3193192895Sjamie * the jail doesn't allow the address family. IPv4 Address passed in in NBO. 3194185435Sbz */ 3195185435Sbzint 319672786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa) 319746155Sphk{ 3198185435Sbz#ifdef INET 3199114168Smike struct sockaddr_in *sai; 3200185435Sbz#endif 3201185435Sbz#ifdef INET6 3202185435Sbz struct sockaddr_in6 *sai6; 3203185435Sbz#endif 3204188144Sjamie int error; 320546155Sphk 3206185435Sbz KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3207185435Sbz KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); 3208185435Sbz 3209188144Sjamie error = 0; 3210188144Sjamie switch (sa->sa_family) 3211185435Sbz { 3212185435Sbz#ifdef INET 3213185435Sbz case AF_INET: 3214185435Sbz sai = (struct sockaddr_in *)sa; 3215188144Sjamie error = prison_check_ip4(cred, &sai->sin_addr); 3216185435Sbz break; 3217185435Sbz#endif 3218185435Sbz#ifdef INET6 3219185435Sbz case AF_INET6: 3220185435Sbz sai6 = (struct sockaddr_in6 *)sa; 3221188144Sjamie error = prison_check_ip6(cred, &sai6->sin6_addr); 3222185435Sbz break; 3223185435Sbz#endif 3224185435Sbz default: 3225192895Sjamie if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) 3226188144Sjamie error = EAFNOSUPPORT; 3227185435Sbz } 3228188144Sjamie return (error); 322946155Sphk} 323072786Srwatson 323172786Srwatson/* 323272786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 323372786Srwatson */ 323472786Srwatsonint 3235114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2) 323672786Srwatson{ 323772786Srwatson 3238192895Sjamie return ((cred1->cr_prison == cred2->cr_prison || 3239192895Sjamie prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); 3240192895Sjamie} 324172786Srwatson 3242192895Sjamie/* 3243192895Sjamie * Return 1 if p2 is a child of p1, otherwise 0. 3244192895Sjamie */ 3245192895Sjamieint 3246192895Sjamieprison_ischild(struct prison *pr1, struct prison *pr2) 3247192895Sjamie{ 3248192895Sjamie 3249192895Sjamie for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) 3250192895Sjamie if (pr1 == pr2) 3251192895Sjamie return (1); 325272786Srwatson return (0); 325372786Srwatson} 325472786Srwatson 325572786Srwatson/* 325672786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0. 325772786Srwatson */ 325872786Srwatsonint 3259114168Smikejailed(struct ucred *cred) 326072786Srwatson{ 326172786Srwatson 3262192895Sjamie return (cred->cr_prison != &prison0); 326372786Srwatson} 326491384Srobert 326591384Srobert/* 3266194090Sjamie * Return the correct hostname (domainname, et al) for the passed credential. 326791384Srobert */ 326891391Srobertvoid 3269114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size) 327091384Srobert{ 3271193066Sjamie struct prison *pr; 327291384Srobert 3273194090Sjamie /* 3274194090Sjamie * A NULL credential can be used to shortcut to the physical 3275194090Sjamie * system's hostname. 3276194090Sjamie */ 3277193066Sjamie pr = (cred != NULL) ? cred->cr_prison : &prison0; 3278193066Sjamie mtx_lock(&pr->pr_mtx); 3279194118Sjamie strlcpy(buf, pr->pr_hostname, size); 3280193066Sjamie mtx_unlock(&pr->pr_mtx); 328191384Srobert} 3282113275Smike 3283194090Sjamievoid 3284194090Sjamiegetcreddomainname(struct ucred *cred, char *buf, size_t size) 3285194090Sjamie{ 3286194090Sjamie 3287194090Sjamie mtx_lock(&cred->cr_prison->pr_mtx); 3288194118Sjamie strlcpy(buf, cred->cr_prison->pr_domainname, size); 3289194090Sjamie mtx_unlock(&cred->cr_prison->pr_mtx); 3290194090Sjamie} 3291194090Sjamie 3292194090Sjamievoid 3293194090Sjamiegetcredhostuuid(struct ucred *cred, char *buf, size_t size) 3294194090Sjamie{ 3295194090Sjamie 3296194090Sjamie mtx_lock(&cred->cr_prison->pr_mtx); 3297194118Sjamie strlcpy(buf, cred->cr_prison->pr_hostuuid, size); 3298194090Sjamie mtx_unlock(&cred->cr_prison->pr_mtx); 3299194090Sjamie} 3300194090Sjamie 3301194090Sjamievoid 3302194090Sjamiegetcredhostid(struct ucred *cred, unsigned long *hostid) 3303194090Sjamie{ 3304194090Sjamie 3305194090Sjamie mtx_lock(&cred->cr_prison->pr_mtx); 3306194090Sjamie *hostid = cred->cr_prison->pr_hostid; 3307194090Sjamie mtx_unlock(&cred->cr_prison->pr_mtx); 3308194090Sjamie} 3309194090Sjamie 3310125804Srwatson/* 3311147185Spjd * Determine whether the subject represented by cred can "see" 3312147185Spjd * status of a mount point. 3313147185Spjd * Returns: 0 for permitted, ENOENT otherwise. 3314147185Spjd * XXX: This function should be called cr_canseemount() and should be 3315147185Spjd * placed in kern_prot.c. 3316125804Srwatson */ 3317125804Srwatsonint 3318147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp) 3319125804Srwatson{ 3320147185Spjd struct prison *pr; 3321147185Spjd struct statfs *sp; 3322147185Spjd size_t len; 3323125804Srwatson 3324192895Sjamie pr = cred->cr_prison; 3325192895Sjamie if (pr->pr_enforce_statfs == 0) 3326147185Spjd return (0); 3327147185Spjd if (pr->pr_root->v_mount == mp) 3328147185Spjd return (0); 3329192895Sjamie if (pr->pr_enforce_statfs == 2) 3330147185Spjd return (ENOENT); 3331147185Spjd /* 3332147185Spjd * If jail's chroot directory is set to "/" we should be able to see 3333147185Spjd * all mount-points from inside a jail. 3334147185Spjd * This is ugly check, but this is the only situation when jail's 3335147185Spjd * directory ends with '/'. 3336147185Spjd */ 3337147185Spjd if (strcmp(pr->pr_path, "/") == 0) 3338147185Spjd return (0); 3339147185Spjd len = strlen(pr->pr_path); 3340147185Spjd sp = &mp->mnt_stat; 3341147185Spjd if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 3342147185Spjd return (ENOENT); 3343147185Spjd /* 3344147185Spjd * Be sure that we don't have situation where jail's root directory 3345147185Spjd * is "/some/path" and mount point is "/some/pathpath". 3346147185Spjd */ 3347147185Spjd if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 3348147185Spjd return (ENOENT); 3349147185Spjd return (0); 3350147185Spjd} 3351147185Spjd 3352147185Spjdvoid 3353147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 3354147185Spjd{ 3355147185Spjd char jpath[MAXPATHLEN]; 3356147185Spjd struct prison *pr; 3357147185Spjd size_t len; 3358147185Spjd 3359192895Sjamie pr = cred->cr_prison; 3360192895Sjamie if (pr->pr_enforce_statfs == 0) 3361147185Spjd return; 3362147185Spjd if (prison_canseemount(cred, mp) != 0) { 3363147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3364147185Spjd strlcpy(sp->f_mntonname, "[restricted]", 3365147185Spjd sizeof(sp->f_mntonname)); 3366147185Spjd return; 3367125804Srwatson } 3368147185Spjd if (pr->pr_root->v_mount == mp) { 3369147185Spjd /* 3370147185Spjd * Clear current buffer data, so we are sure nothing from 3371147185Spjd * the valid path left there. 3372147185Spjd */ 3373147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3374147185Spjd *sp->f_mntonname = '/'; 3375147185Spjd return; 3376147185Spjd } 3377147185Spjd /* 3378147185Spjd * If jail's chroot directory is set to "/" we should be able to see 3379147185Spjd * all mount-points from inside a jail. 3380147185Spjd */ 3381147185Spjd if (strcmp(pr->pr_path, "/") == 0) 3382147185Spjd return; 3383147185Spjd len = strlen(pr->pr_path); 3384147185Spjd strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 3385147185Spjd /* 3386147185Spjd * Clear current buffer data, so we are sure nothing from 3387147185Spjd * the valid path left there. 3388147185Spjd */ 3389147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3390147185Spjd if (*jpath == '\0') { 3391147185Spjd /* Should never happen. */ 3392147185Spjd *sp->f_mntonname = '/'; 3393147185Spjd } else { 3394147185Spjd strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 3395147185Spjd } 3396125804Srwatson} 3397125804Srwatson 3398164032Srwatson/* 3399164032Srwatson * Check with permission for a specific privilege is granted within jail. We 3400164032Srwatson * have a specific list of accepted privileges; the rest are denied. 3401164032Srwatson */ 3402164032Srwatsonint 3403164032Srwatsonprison_priv_check(struct ucred *cred, int priv) 3404164032Srwatson{ 3405164032Srwatson 3406164032Srwatson if (!jailed(cred)) 3407164032Srwatson return (0); 3408164032Srwatson 3409194915Sjamie#ifdef VIMAGE 3410194915Sjamie /* 3411194915Sjamie * Privileges specific to prisons with a virtual network stack. 3412194915Sjamie * There might be a duplicate entry here in case the privilege 3413194915Sjamie * is only granted conditionally in the legacy jail case. 3414194915Sjamie */ 3415164032Srwatson switch (priv) { 3416194915Sjamie#ifdef notyet 3417194915Sjamie /* 3418194915Sjamie * NFS-specific privileges. 3419194915Sjamie */ 3420194915Sjamie case PRIV_NFS_DAEMON: 3421194915Sjamie case PRIV_NFS_LOCKD: 3422194915Sjamie#endif 3423194915Sjamie /* 3424194915Sjamie * Network stack privileges. 3425194915Sjamie */ 3426194915Sjamie case PRIV_NET_BRIDGE: 3427194915Sjamie case PRIV_NET_GRE: 3428194915Sjamie case PRIV_NET_BPF: 3429194915Sjamie case PRIV_NET_RAW: /* Dup, cond. in legacy jail case. */ 3430194915Sjamie case PRIV_NET_ROUTE: 3431194915Sjamie case PRIV_NET_TAP: 3432194915Sjamie case PRIV_NET_SETIFMTU: 3433194915Sjamie case PRIV_NET_SETIFFLAGS: 3434194915Sjamie case PRIV_NET_SETIFCAP: 3435194915Sjamie case PRIV_NET_SETIFNAME : 3436194915Sjamie case PRIV_NET_SETIFMETRIC: 3437194915Sjamie case PRIV_NET_SETIFPHYS: 3438194915Sjamie case PRIV_NET_SETIFMAC: 3439194915Sjamie case PRIV_NET_ADDMULTI: 3440194915Sjamie case PRIV_NET_DELMULTI: 3441194915Sjamie case PRIV_NET_HWIOCTL: 3442194915Sjamie case PRIV_NET_SETLLADDR: 3443194915Sjamie case PRIV_NET_ADDIFGROUP: 3444194915Sjamie case PRIV_NET_DELIFGROUP: 3445194915Sjamie case PRIV_NET_IFCREATE: 3446194915Sjamie case PRIV_NET_IFDESTROY: 3447194915Sjamie case PRIV_NET_ADDIFADDR: 3448194915Sjamie case PRIV_NET_DELIFADDR: 3449194915Sjamie case PRIV_NET_LAGG: 3450194915Sjamie case PRIV_NET_GIF: 3451194915Sjamie case PRIV_NET_SETIFVNET: 3452164032Srwatson 3453164032Srwatson /* 3454194915Sjamie * 802.11-related privileges. 3455194915Sjamie */ 3456194915Sjamie case PRIV_NET80211_GETKEY: 3457194915Sjamie#ifdef notyet 3458194915Sjamie case PRIV_NET80211_MANAGE: /* XXX-BZ discuss with sam@ */ 3459194915Sjamie#endif 3460194915Sjamie 3461194915Sjamie#ifdef notyet 3462194915Sjamie /* 3463194915Sjamie * AppleTalk privileges. 3464194915Sjamie */ 3465194915Sjamie case PRIV_NETATALK_RESERVEDPORT: 3466194915Sjamie 3467194915Sjamie /* 3468194915Sjamie * ATM privileges. 3469194915Sjamie */ 3470194915Sjamie case PRIV_NETATM_CFG: 3471194915Sjamie case PRIV_NETATM_ADD: 3472194915Sjamie case PRIV_NETATM_DEL: 3473194915Sjamie case PRIV_NETATM_SET: 3474194915Sjamie 3475194915Sjamie /* 3476194915Sjamie * Bluetooth privileges. 3477194915Sjamie */ 3478194915Sjamie case PRIV_NETBLUETOOTH_RAW: 3479194915Sjamie#endif 3480194915Sjamie 3481194915Sjamie /* 3482194915Sjamie * Netgraph and netgraph module privileges. 3483194915Sjamie */ 3484194915Sjamie case PRIV_NETGRAPH_CONTROL: 3485194915Sjamie#ifdef notyet 3486194915Sjamie case PRIV_NETGRAPH_TTY: 3487194915Sjamie#endif 3488194915Sjamie 3489194915Sjamie /* 3490194915Sjamie * IPv4 and IPv6 privileges. 3491194915Sjamie */ 3492194915Sjamie case PRIV_NETINET_IPFW: 3493194915Sjamie case PRIV_NETINET_DIVERT: 3494194915Sjamie case PRIV_NETINET_PF: 3495194915Sjamie case PRIV_NETINET_DUMMYNET: 3496194915Sjamie case PRIV_NETINET_CARP: 3497194915Sjamie case PRIV_NETINET_MROUTE: 3498194915Sjamie case PRIV_NETINET_RAW: 3499194915Sjamie case PRIV_NETINET_ADDRCTRL6: 3500194915Sjamie case PRIV_NETINET_ND6: 3501194915Sjamie case PRIV_NETINET_SCOPE6: 3502194915Sjamie case PRIV_NETINET_ALIFETIME6: 3503194915Sjamie case PRIV_NETINET_IPSEC: 3504194915Sjamie case PRIV_NETINET_BINDANY: 3505194915Sjamie 3506194915Sjamie#ifdef notyet 3507194915Sjamie /* 3508194915Sjamie * IPX/SPX privileges. 3509194915Sjamie */ 3510194915Sjamie case PRIV_NETIPX_RESERVEDPORT: 3511194915Sjamie case PRIV_NETIPX_RAW: 3512194915Sjamie 3513194915Sjamie /* 3514194915Sjamie * NCP privileges. 3515194915Sjamie */ 3516194915Sjamie case PRIV_NETNCP: 3517194915Sjamie 3518194915Sjamie /* 3519194915Sjamie * SMB privileges. 3520194915Sjamie */ 3521194915Sjamie case PRIV_NETSMB: 3522194915Sjamie#endif 3523194915Sjamie 3524194915Sjamie /* 3525194915Sjamie * No default: or deny here. 3526194915Sjamie * In case of no permit fall through to next switch(). 3527194915Sjamie */ 3528194915Sjamie if (cred->cr_prison->pr_flags & PR_VNET) 3529194915Sjamie return (0); 3530194915Sjamie } 3531194915Sjamie#endif /* VIMAGE */ 3532194915Sjamie 3533194915Sjamie switch (priv) { 3534194915Sjamie 3535194915Sjamie /* 3536164032Srwatson * Allow ktrace privileges for root in jail. 3537164032Srwatson */ 3538164032Srwatson case PRIV_KTRACE: 3539164032Srwatson 3540166827Srwatson#if 0 3541164032Srwatson /* 3542164032Srwatson * Allow jailed processes to configure audit identity and 3543164032Srwatson * submit audit records (login, etc). In the future we may 3544164032Srwatson * want to further refine the relationship between audit and 3545164032Srwatson * jail. 3546164032Srwatson */ 3547164032Srwatson case PRIV_AUDIT_GETAUDIT: 3548164032Srwatson case PRIV_AUDIT_SETAUDIT: 3549164032Srwatson case PRIV_AUDIT_SUBMIT: 3550166827Srwatson#endif 3551164032Srwatson 3552164032Srwatson /* 3553164032Srwatson * Allow jailed processes to manipulate process UNIX 3554164032Srwatson * credentials in any way they see fit. 3555164032Srwatson */ 3556164032Srwatson case PRIV_CRED_SETUID: 3557164032Srwatson case PRIV_CRED_SETEUID: 3558164032Srwatson case PRIV_CRED_SETGID: 3559164032Srwatson case PRIV_CRED_SETEGID: 3560164032Srwatson case PRIV_CRED_SETGROUPS: 3561164032Srwatson case PRIV_CRED_SETREUID: 3562164032Srwatson case PRIV_CRED_SETREGID: 3563164032Srwatson case PRIV_CRED_SETRESUID: 3564164032Srwatson case PRIV_CRED_SETRESGID: 3565164032Srwatson 3566164032Srwatson /* 3567164032Srwatson * Jail implements visibility constraints already, so allow 3568164032Srwatson * jailed root to override uid/gid-based constraints. 3569164032Srwatson */ 3570164032Srwatson case PRIV_SEEOTHERGIDS: 3571164032Srwatson case PRIV_SEEOTHERUIDS: 3572164032Srwatson 3573164032Srwatson /* 3574164032Srwatson * Jail implements inter-process debugging limits already, so 3575164032Srwatson * allow jailed root various debugging privileges. 3576164032Srwatson */ 3577164032Srwatson case PRIV_DEBUG_DIFFCRED: 3578164032Srwatson case PRIV_DEBUG_SUGID: 3579164032Srwatson case PRIV_DEBUG_UNPRIV: 3580164032Srwatson 3581164032Srwatson /* 3582164032Srwatson * Allow jail to set various resource limits and login 3583164032Srwatson * properties, and for now, exceed process resource limits. 3584164032Srwatson */ 3585164032Srwatson case PRIV_PROC_LIMIT: 3586164032Srwatson case PRIV_PROC_SETLOGIN: 3587164032Srwatson case PRIV_PROC_SETRLIMIT: 3588164032Srwatson 3589164032Srwatson /* 3590164032Srwatson * System V and POSIX IPC privileges are granted in jail. 3591164032Srwatson */ 3592164032Srwatson case PRIV_IPC_READ: 3593164032Srwatson case PRIV_IPC_WRITE: 3594164032Srwatson case PRIV_IPC_ADMIN: 3595164032Srwatson case PRIV_IPC_MSGSIZE: 3596164032Srwatson case PRIV_MQ_ADMIN: 3597164032Srwatson 3598164032Srwatson /* 3599192895Sjamie * Jail operations within a jail work on child jails. 3600192895Sjamie */ 3601192895Sjamie case PRIV_JAIL_ATTACH: 3602192895Sjamie case PRIV_JAIL_SET: 3603192895Sjamie case PRIV_JAIL_REMOVE: 3604192895Sjamie 3605192895Sjamie /* 3606164032Srwatson * Jail implements its own inter-process limits, so allow 3607164032Srwatson * root processes in jail to change scheduling on other 3608164032Srwatson * processes in the same jail. Likewise for signalling. 3609164032Srwatson */ 3610164032Srwatson case PRIV_SCHED_DIFFCRED: 3611185435Sbz case PRIV_SCHED_CPUSET: 3612164032Srwatson case PRIV_SIGNAL_DIFFCRED: 3613164032Srwatson case PRIV_SIGNAL_SUGID: 3614164032Srwatson 3615164032Srwatson /* 3616164032Srwatson * Allow jailed processes to write to sysctls marked as jail 3617164032Srwatson * writable. 3618164032Srwatson */ 3619164032Srwatson case PRIV_SYSCTL_WRITEJAIL: 3620164032Srwatson 3621164032Srwatson /* 3622164032Srwatson * Allow root in jail to manage a variety of quota 3623166831Srwatson * properties. These should likely be conditional on a 3624166831Srwatson * configuration option. 3625164032Srwatson */ 3626166832Srwatson case PRIV_VFS_GETQUOTA: 3627166832Srwatson case PRIV_VFS_SETQUOTA: 3628164032Srwatson 3629164032Srwatson /* 3630164032Srwatson * Since Jail relies on chroot() to implement file system 3631164032Srwatson * protections, grant many VFS privileges to root in jail. 3632164032Srwatson * Be careful to exclude mount-related and NFS-related 3633164032Srwatson * privileges. 3634164032Srwatson */ 3635164032Srwatson case PRIV_VFS_READ: 3636164032Srwatson case PRIV_VFS_WRITE: 3637164032Srwatson case PRIV_VFS_ADMIN: 3638164032Srwatson case PRIV_VFS_EXEC: 3639164032Srwatson case PRIV_VFS_LOOKUP: 3640164032Srwatson case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 3641164032Srwatson case PRIV_VFS_CHFLAGS_DEV: 3642164032Srwatson case PRIV_VFS_CHOWN: 3643164032Srwatson case PRIV_VFS_CHROOT: 3644167152Spjd case PRIV_VFS_RETAINSUGID: 3645164032Srwatson case PRIV_VFS_FCHROOT: 3646164032Srwatson case PRIV_VFS_LINK: 3647164032Srwatson case PRIV_VFS_SETGID: 3648172860Srwatson case PRIV_VFS_STAT: 3649164032Srwatson case PRIV_VFS_STICKYFILE: 3650164032Srwatson return (0); 3651164032Srwatson 3652164032Srwatson /* 3653164032Srwatson * Depending on the global setting, allow privilege of 3654164032Srwatson * setting system flags. 3655164032Srwatson */ 3656164032Srwatson case PRIV_VFS_SYSFLAGS: 3657192895Sjamie if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) 3658164032Srwatson return (0); 3659164032Srwatson else 3660164032Srwatson return (EPERM); 3661164032Srwatson 3662164032Srwatson /* 3663168396Spjd * Depending on the global setting, allow privilege of 3664168396Spjd * mounting/unmounting file systems. 3665168396Spjd */ 3666168396Spjd case PRIV_VFS_MOUNT: 3667168396Spjd case PRIV_VFS_UNMOUNT: 3668168396Spjd case PRIV_VFS_MOUNT_NONUSER: 3669168699Spjd case PRIV_VFS_MOUNT_OWNER: 3670192895Sjamie if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT) 3671168396Spjd return (0); 3672168396Spjd else 3673168396Spjd return (EPERM); 3674168396Spjd 3675168396Spjd /* 3676168591Srwatson * Allow jailed root to bind reserved ports and reuse in-use 3677168591Srwatson * ports. 3678164032Srwatson */ 3679164032Srwatson case PRIV_NETINET_RESERVEDPORT: 3680168591Srwatson case PRIV_NETINET_REUSEPORT: 3681164032Srwatson return (0); 3682164032Srwatson 3683164032Srwatson /* 3684175630Sbz * Allow jailed root to set certian IPv4/6 (option) headers. 3685175630Sbz */ 3686175630Sbz case PRIV_NETINET_SETHDROPTS: 3687175630Sbz return (0); 3688175630Sbz 3689175630Sbz /* 3690164032Srwatson * Conditionally allow creating raw sockets in jail. 3691164032Srwatson */ 3692164032Srwatson case PRIV_NETINET_RAW: 3693192895Sjamie if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) 3694164032Srwatson return (0); 3695164032Srwatson else 3696164032Srwatson return (EPERM); 3697164032Srwatson 3698164032Srwatson /* 3699164032Srwatson * Since jail implements its own visibility limits on netstat 3700164032Srwatson * sysctls, allow getcred. This allows identd to work in 3701164032Srwatson * jail. 3702164032Srwatson */ 3703164032Srwatson case PRIV_NETINET_GETCRED: 3704164032Srwatson return (0); 3705164032Srwatson 3706164032Srwatson default: 3707164032Srwatson /* 3708164032Srwatson * In all remaining cases, deny the privilege request. This 3709164032Srwatson * includes almost all network privileges, many system 3710164032Srwatson * configuration privileges. 3711164032Srwatson */ 3712164032Srwatson return (EPERM); 3713164032Srwatson } 3714164032Srwatson} 3715164032Srwatson 3716192895Sjamie/* 3717192895Sjamie * Return the part of pr2's name that is relative to pr1, or the whole name 3718192895Sjamie * if it does not directly follow. 3719192895Sjamie */ 3720192895Sjamie 3721192895Sjamiechar * 3722192895Sjamieprison_name(struct prison *pr1, struct prison *pr2) 3723192895Sjamie{ 3724192895Sjamie char *name; 3725192895Sjamie 3726192895Sjamie /* Jails see themselves as "0" (if they see themselves at all). */ 3727192895Sjamie if (pr1 == pr2) 3728192895Sjamie return "0"; 3729192895Sjamie name = pr2->pr_name; 3730192895Sjamie if (prison_ischild(pr1, pr2)) { 3731192895Sjamie /* 3732192895Sjamie * pr1 isn't locked (and allprison_lock may not be either) 3733192895Sjamie * so its length can't be counted on. But the number of dots 3734192895Sjamie * can be counted on - and counted. 3735192895Sjamie */ 3736192895Sjamie for (; pr1 != &prison0; pr1 = pr1->pr_parent) 3737192895Sjamie name = strchr(name, '.') + 1; 3738192895Sjamie } 3739192895Sjamie return (name); 3740192895Sjamie} 3741192895Sjamie 3742192895Sjamie/* 3743192895Sjamie * Return the part of pr2's path that is relative to pr1, or the whole path 3744192895Sjamie * if it does not directly follow. 3745192895Sjamie */ 3746192895Sjamiestatic char * 3747192895Sjamieprison_path(struct prison *pr1, struct prison *pr2) 3748192895Sjamie{ 3749192895Sjamie char *path1, *path2; 3750192895Sjamie int len1; 3751192895Sjamie 3752192895Sjamie path1 = pr1->pr_path; 3753192895Sjamie path2 = pr2->pr_path; 3754192895Sjamie if (!strcmp(path1, "/")) 3755192895Sjamie return (path2); 3756192895Sjamie len1 = strlen(path1); 3757192895Sjamie if (strncmp(path1, path2, len1)) 3758192895Sjamie return (path2); 3759192895Sjamie if (path2[len1] == '\0') 3760192895Sjamie return "/"; 3761192895Sjamie if (path2[len1] == '/') 3762192895Sjamie return (path2 + len1); 3763192895Sjamie return (path2); 3764192895Sjamie} 3765192895Sjamie 3766192895Sjamie 3767192895Sjamie/* 3768192895Sjamie * Jail-related sysctls. 3769192895Sjamie */ 3770192895SjamieSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 3771192895Sjamie "Jails"); 3772192895Sjamie 3773113275Smikestatic int 3774113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS) 3775113275Smike{ 3776191673Sjamie struct xprison *xp; 3777192895Sjamie struct prison *pr, *cpr; 3778191673Sjamie#ifdef INET 3779191673Sjamie struct in_addr *ip4 = NULL; 3780191673Sjamie int ip4s = 0; 3781191673Sjamie#endif 3782191673Sjamie#ifdef INET6 3783191673Sjamie struct in_addr *ip6 = NULL; 3784191673Sjamie int ip6s = 0; 3785191673Sjamie#endif 3786192895Sjamie int descend, error; 3787113275Smike 3788191673Sjamie xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); 3789192895Sjamie pr = req->td->td_ucred->cr_prison; 3790191673Sjamie error = 0; 3791168401Spjd sx_slock(&allprison_lock); 3792192895Sjamie FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 3793192895Sjamie#if defined(INET) || defined(INET6) 3794191673Sjamie again: 3795192895Sjamie#endif 3796192895Sjamie mtx_lock(&cpr->pr_mtx); 3797185435Sbz#ifdef INET 3798192895Sjamie if (cpr->pr_ip4s > 0) { 3799192895Sjamie if (ip4s < cpr->pr_ip4s) { 3800192895Sjamie ip4s = cpr->pr_ip4s; 3801192895Sjamie mtx_unlock(&cpr->pr_mtx); 3802191673Sjamie ip4 = realloc(ip4, ip4s * 3803191673Sjamie sizeof(struct in_addr), M_TEMP, M_WAITOK); 3804191673Sjamie goto again; 3805191673Sjamie } 3806192895Sjamie bcopy(cpr->pr_ip4, ip4, 3807192895Sjamie cpr->pr_ip4s * sizeof(struct in_addr)); 3808191673Sjamie } 3809185435Sbz#endif 3810185435Sbz#ifdef INET6 3811192895Sjamie if (cpr->pr_ip6s > 0) { 3812192895Sjamie if (ip6s < cpr->pr_ip6s) { 3813192895Sjamie ip6s = cpr->pr_ip6s; 3814192895Sjamie mtx_unlock(&cpr->pr_mtx); 3815191673Sjamie ip6 = realloc(ip6, ip6s * 3816191673Sjamie sizeof(struct in6_addr), M_TEMP, M_WAITOK); 3817191673Sjamie goto again; 3818191673Sjamie } 3819192895Sjamie bcopy(cpr->pr_ip6, ip6, 3820192895Sjamie cpr->pr_ip6s * sizeof(struct in6_addr)); 3821191673Sjamie } 3822185435Sbz#endif 3823192895Sjamie if (cpr->pr_ref == 0) { 3824192895Sjamie mtx_unlock(&cpr->pr_mtx); 3825191673Sjamie continue; 3826191673Sjamie } 3827191673Sjamie bzero(xp, sizeof(*xp)); 3828113275Smike xp->pr_version = XPRISON_VERSION; 3829192895Sjamie xp->pr_id = cpr->pr_id; 3830192895Sjamie xp->pr_state = cpr->pr_uref > 0 3831191673Sjamie ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; 3832192895Sjamie strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); 3833194118Sjamie strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); 3834192895Sjamie strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); 3835185435Sbz#ifdef INET 3836192895Sjamie xp->pr_ip4s = cpr->pr_ip4s; 3837185435Sbz#endif 3838185435Sbz#ifdef INET6 3839192895Sjamie xp->pr_ip6s = cpr->pr_ip6s; 3840185435Sbz#endif 3841192895Sjamie mtx_unlock(&cpr->pr_mtx); 3842191673Sjamie error = SYSCTL_OUT(req, xp, sizeof(*xp)); 3843191673Sjamie if (error) 3844191673Sjamie break; 3845185435Sbz#ifdef INET 3846191673Sjamie if (xp->pr_ip4s > 0) { 3847191673Sjamie error = SYSCTL_OUT(req, ip4, 3848191673Sjamie xp->pr_ip4s * sizeof(struct in_addr)); 3849191673Sjamie if (error) 3850191673Sjamie break; 3851185435Sbz } 3852185435Sbz#endif 3853185435Sbz#ifdef INET6 3854191673Sjamie if (xp->pr_ip6s > 0) { 3855191673Sjamie error = SYSCTL_OUT(req, ip6, 3856191673Sjamie xp->pr_ip6s * sizeof(struct in6_addr)); 3857191673Sjamie if (error) 3858191673Sjamie break; 3859185435Sbz } 3860185435Sbz#endif 3861113275Smike } 3862168401Spjd sx_sunlock(&allprison_lock); 3863191673Sjamie free(xp, M_TEMP); 3864191673Sjamie#ifdef INET 3865191673Sjamie free(ip4, M_TEMP); 3866191673Sjamie#endif 3867191673Sjamie#ifdef INET6 3868191673Sjamie free(ip6, M_TEMP); 3869191673Sjamie#endif 3870167354Spjd return (error); 3871113275Smike} 3872113275Smike 3873187864SedSYSCTL_OID(_security_jail, OID_AUTO, list, 3874187864Sed CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 3875187864Sed sysctl_jail_list, "S", "List of active jails"); 3876126004Spjd 3877126004Spjdstatic int 3878126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 3879126004Spjd{ 3880126004Spjd int error, injail; 3881126004Spjd 3882126004Spjd injail = jailed(req->td->td_ucred); 3883126004Spjd error = SYSCTL_OUT(req, &injail, sizeof(injail)); 3884126004Spjd 3885126004Spjd return (error); 3886126004Spjd} 3887192895Sjamie 3888187864SedSYSCTL_PROC(_security_jail, OID_AUTO, jailed, 3889187864Sed CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 3890187864Sed sysctl_jail_jailed, "I", "Process in jail?"); 3891185435Sbz 3892192895Sjamie#if defined(INET) || defined(INET6) 3893193865SjamieSYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, 3894192895Sjamie &jail_max_af_ips, 0, 3895192895Sjamie "Number of IP addresses a jail may have at most per address family"); 3896192895Sjamie#endif 3897192895Sjamie 3898192895Sjamie/* 3899192895Sjamie * Default parameters for jail(2) compatability. For historical reasons, 3900192895Sjamie * the sysctl names have varying similarity to the parameter names. Prisons 3901192895Sjamie * just see their own parameters, and can't change them. 3902192895Sjamie */ 3903192895Sjamiestatic int 3904192895Sjamiesysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) 3905192895Sjamie{ 3906192895Sjamie struct prison *pr; 3907192895Sjamie int allow, error, i; 3908192895Sjamie 3909192895Sjamie pr = req->td->td_ucred->cr_prison; 3910192895Sjamie allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow; 3911192895Sjamie 3912192895Sjamie /* Get the current flag value, and convert it to a boolean. */ 3913192895Sjamie i = (allow & arg2) ? 1 : 0; 3914192895Sjamie if (arg1 != NULL) 3915192895Sjamie i = !i; 3916192895Sjamie error = sysctl_handle_int(oidp, &i, 0, req); 3917192895Sjamie if (error || !req->newptr) 3918192895Sjamie return (error); 3919192895Sjamie i = i ? arg2 : 0; 3920192895Sjamie if (arg1 != NULL) 3921192895Sjamie i ^= arg2; 3922192895Sjamie /* 3923192895Sjamie * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 3924192895Sjamie * for writing. 3925192895Sjamie */ 3926192895Sjamie mtx_lock(&prison0.pr_mtx); 3927192895Sjamie jail_default_allow = (jail_default_allow & ~arg2) | i; 3928192895Sjamie mtx_unlock(&prison0.pr_mtx); 3929192895Sjamie return (0); 3930192895Sjamie} 3931192895Sjamie 3932192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, 3933192895Sjamie CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3934192895Sjamie NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", 3935192895Sjamie "Processes in jail can set their hostnames"); 3936192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, 3937192895Sjamie CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3938192895Sjamie (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", 3939192895Sjamie "Processes in jail are limited to creating UNIX/IP/route sockets only"); 3940192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, 3941192895Sjamie CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3942192895Sjamie NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", 3943192895Sjamie "Processes in jail can use System V IPC primitives"); 3944192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, 3945192895Sjamie CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3946192895Sjamie NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", 3947192895Sjamie "Prison root can create raw sockets"); 3948192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, 3949192895Sjamie CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3950192895Sjamie NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", 3951192895Sjamie "Processes in jail can alter system file flags"); 3952192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, 3953192895Sjamie CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3954192895Sjamie NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", 3955192895Sjamie "Processes in jail can mount/unmount jail-friendly file systems"); 3956192895Sjamie 3957192895Sjamiestatic int 3958192895Sjamiesysctl_jail_default_level(SYSCTL_HANDLER_ARGS) 3959192895Sjamie{ 3960192895Sjamie struct prison *pr; 3961192895Sjamie int level, error; 3962192895Sjamie 3963192895Sjamie pr = req->td->td_ucred->cr_prison; 3964192895Sjamie level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2); 3965192895Sjamie error = sysctl_handle_int(oidp, &level, 0, req); 3966192895Sjamie if (error || !req->newptr) 3967192895Sjamie return (error); 3968192895Sjamie *(int *)arg1 = level; 3969192895Sjamie return (0); 3970192895Sjamie} 3971192895Sjamie 3972192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs, 3973192895Sjamie CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3974192895Sjamie &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs), 3975192895Sjamie sysctl_jail_default_level, "I", 3976192895Sjamie "Processes in jail cannot see all mounted file systems"); 3977192895Sjamie 3978192895Sjamie/* 3979192895Sjamie * Nodes to describe jail parameters. Maximum length of string parameters 3980192895Sjamie * is returned in the string itself, and the other parameters exist merely 3981192895Sjamie * to make themselves and their types known. 3982192895Sjamie */ 3983192895SjamieSYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0, 3984192895Sjamie "Jail parameters"); 3985192895Sjamie 3986192895Sjamieint 3987192895Sjamiesysctl_jail_param(SYSCTL_HANDLER_ARGS) 3988192895Sjamie{ 3989192895Sjamie int i; 3990192895Sjamie long l; 3991192895Sjamie size_t s; 3992192895Sjamie char numbuf[12]; 3993192895Sjamie 3994192895Sjamie switch (oidp->oid_kind & CTLTYPE) 3995192895Sjamie { 3996192895Sjamie case CTLTYPE_LONG: 3997192895Sjamie case CTLTYPE_ULONG: 3998192895Sjamie l = 0; 3999192895Sjamie#ifdef SCTL_MASK32 4000192895Sjamie if (!(req->flags & SCTL_MASK32)) 4001192895Sjamie#endif 4002192895Sjamie return (SYSCTL_OUT(req, &l, sizeof(l))); 4003192895Sjamie case CTLTYPE_INT: 4004192895Sjamie case CTLTYPE_UINT: 4005192895Sjamie i = 0; 4006192895Sjamie return (SYSCTL_OUT(req, &i, sizeof(i))); 4007192895Sjamie case CTLTYPE_STRING: 4008192895Sjamie snprintf(numbuf, sizeof(numbuf), "%d", arg2); 4009192895Sjamie return 4010192895Sjamie (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req)); 4011192895Sjamie case CTLTYPE_STRUCT: 4012192895Sjamie s = (size_t)arg2; 4013192895Sjamie return (SYSCTL_OUT(req, &s, sizeof(s))); 4014192895Sjamie } 4015192895Sjamie return (0); 4016192895Sjamie} 4017192895Sjamie 4018192895SjamieSYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); 4019192895SjamieSYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); 4020192895SjamieSYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); 4021192895SjamieSYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); 4022192895SjamieSYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, 4023192895Sjamie "I", "Jail secure level"); 4024192895SjamieSYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, 4025192895Sjamie "I", "Jail cannot see all mounted file systems"); 4026192895SjamieSYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, 4027192895Sjamie "B", "Jail persistence"); 4028194251Sjamie#ifdef VIMAGE 4029194251SjamieSYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN, 4030195870Sjamie "E,jailsys", "Virtual network stack"); 4031194251Sjamie#endif 4032192895SjamieSYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, 4033192895Sjamie "B", "Jail is in the process of shutting down"); 4034192895Sjamie 4035194762SjamieSYSCTL_JAIL_PARAM_NODE(children, "Number of child jails"); 4036194762SjamieSYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD, 4037194762Sjamie "I", "Current number of child jails"); 4038194762SjamieSYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW, 4039194762Sjamie "I", "Maximum number of child jails"); 4040194762Sjamie 4041195870SjamieSYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info"); 4042192895SjamieSYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, 4043192895Sjamie "Jail hostname"); 4044193066SjamieSYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN, 4045193066Sjamie "Jail NIS domainname"); 4046193066SjamieSYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN, 4047193066Sjamie "Jail host UUID"); 4048193066SjamieSYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW, 4049193066Sjamie "LU", "Jail host ID"); 4050192895Sjamie 4051192895SjamieSYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset"); 4052192895SjamieSYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); 4053192895Sjamie 4054192895Sjamie#ifdef INET 4055195974SjamieSYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN, 4056195974Sjamie "Jail IPv4 address virtualization"); 4057192895SjamieSYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), 4058192895Sjamie "S,in_addr,a", "Jail IPv4 addresses"); 4059192895Sjamie#endif 4060192895Sjamie#ifdef INET6 4061195974SjamieSYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN, 4062195974Sjamie "Jail IPv6 address virtualization"); 4063192895SjamieSYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), 4064192895Sjamie "S,in6_addr,a", "Jail IPv6 addresses"); 4065192895Sjamie#endif 4066192895Sjamie 4067192895SjamieSYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags"); 4068192895SjamieSYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW, 4069192895Sjamie "B", "Jail may set hostname"); 4070192895SjamieSYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW, 4071192895Sjamie "B", "Jail may use SYSV IPC"); 4072192895SjamieSYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW, 4073192895Sjamie "B", "Jail may create raw sockets"); 4074192895SjamieSYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW, 4075192895Sjamie "B", "Jail may alter system file flags"); 4076192895SjamieSYSCTL_JAIL_PARAM(_allow, mount, CTLTYPE_INT | CTLFLAG_RW, 4077192895Sjamie "B", "Jail may mount/unmount jail-friendly file systems"); 4078192895SjamieSYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, 4079192895Sjamie "B", "Jail may set file quotas"); 4080192895SjamieSYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, 4081192895Sjamie "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); 4082192895Sjamie 4083192895Sjamie 4084185435Sbz#ifdef DDB 4085191673Sjamie 4086191673Sjamiestatic void 4087191673Sjamiedb_show_prison(struct prison *pr) 4088185435Sbz{ 4089192895Sjamie int fi; 4090191673Sjamie#if defined(INET) || defined(INET6) 4091191673Sjamie int ii; 4092185435Sbz#endif 4093195870Sjamie unsigned jsf; 4094185435Sbz#ifdef INET6 4095185435Sbz char ip6buf[INET6_ADDRSTRLEN]; 4096185435Sbz#endif 4097185435Sbz 4098191673Sjamie db_printf("prison %p:\n", pr); 4099191673Sjamie db_printf(" jid = %d\n", pr->pr_id); 4100191673Sjamie db_printf(" name = %s\n", pr->pr_name); 4101192895Sjamie db_printf(" parent = %p\n", pr->pr_parent); 4102191673Sjamie db_printf(" ref = %d\n", pr->pr_ref); 4103191673Sjamie db_printf(" uref = %d\n", pr->pr_uref); 4104191673Sjamie db_printf(" path = %s\n", pr->pr_path); 4105191673Sjamie db_printf(" cpuset = %d\n", pr->pr_cpuset 4106191673Sjamie ? pr->pr_cpuset->cs_id : -1); 4107194251Sjamie#ifdef VIMAGE 4108194251Sjamie db_printf(" vnet = %p\n", pr->pr_vnet); 4109194251Sjamie#endif 4110191673Sjamie db_printf(" root = %p\n", pr->pr_root); 4111191673Sjamie db_printf(" securelevel = %d\n", pr->pr_securelevel); 4112194762Sjamie db_printf(" childcount = %d\n", pr->pr_childcount); 4113192895Sjamie db_printf(" child = %p\n", LIST_FIRST(&pr->pr_children)); 4114192895Sjamie db_printf(" sibling = %p\n", LIST_NEXT(pr, pr_sibling)); 4115191673Sjamie db_printf(" flags = %x", pr->pr_flags); 4116192895Sjamie for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 4117192895Sjamie fi++) 4118192895Sjamie if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi))) 4119192895Sjamie db_printf(" %s", pr_flag_names[fi]); 4120195870Sjamie for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 4121195870Sjamie fi++) { 4122195870Sjamie jsf = pr->pr_flags & 4123195870Sjamie (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); 4124195870Sjamie db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name, 4125195870Sjamie pr_flag_jailsys[fi].disable && 4126195870Sjamie (jsf == pr_flag_jailsys[fi].disable) ? "disable" 4127195870Sjamie : (jsf == pr_flag_jailsys[fi].new) ? "new" 4128195870Sjamie : "inherit"); 4129195870Sjamie } 4130192895Sjamie db_printf(" allow = %x", pr->pr_allow); 4131192895Sjamie for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 4132192895Sjamie fi++) 4133192895Sjamie if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi))) 4134192895Sjamie db_printf(" %s", pr_allow_names[fi]); 4135191673Sjamie db_printf("\n"); 4136192895Sjamie db_printf(" enforce_statfs = %d\n", pr->pr_enforce_statfs); 4137194118Sjamie db_printf(" host.hostname = %s\n", pr->pr_hostname); 4138194118Sjamie db_printf(" host.domainname = %s\n", pr->pr_domainname); 4139194118Sjamie db_printf(" host.hostuuid = %s\n", pr->pr_hostuuid); 4140193066Sjamie db_printf(" host.hostid = %lu\n", pr->pr_hostid); 4141185435Sbz#ifdef INET 4142191673Sjamie db_printf(" ip4s = %d\n", pr->pr_ip4s); 4143191673Sjamie for (ii = 0; ii < pr->pr_ip4s; ii++) 4144191673Sjamie db_printf(" %s %s\n", 4145191673Sjamie ii == 0 ? "ip4 =" : " ", 4146191673Sjamie inet_ntoa(pr->pr_ip4[ii])); 4147185435Sbz#endif 4148185435Sbz#ifdef INET6 4149191673Sjamie db_printf(" ip6s = %d\n", pr->pr_ip6s); 4150191673Sjamie for (ii = 0; ii < pr->pr_ip6s; ii++) 4151191673Sjamie db_printf(" %s %s\n", 4152191673Sjamie ii == 0 ? "ip6 =" : " ", 4153191673Sjamie ip6_sprintf(ip6buf, &pr->pr_ip6[ii])); 4154191673Sjamie#endif 4155191673Sjamie} 4156191673Sjamie 4157191673SjamieDB_SHOW_COMMAND(prison, db_show_prison_command) 4158191673Sjamie{ 4159191673Sjamie struct prison *pr; 4160191673Sjamie 4161191673Sjamie if (!have_addr) { 4162192895Sjamie /* 4163192895Sjamie * Show all prisons in the list, and prison0 which is not 4164192895Sjamie * listed. 4165192895Sjamie */ 4166192895Sjamie db_show_prison(&prison0); 4167192895Sjamie if (!db_pager_quit) { 4168192895Sjamie TAILQ_FOREACH(pr, &allprison, pr_list) { 4169192895Sjamie db_show_prison(pr); 4170192895Sjamie if (db_pager_quit) 4171192895Sjamie break; 4172192895Sjamie } 4173191673Sjamie } 4174191673Sjamie return; 4175191673Sjamie } 4176191673Sjamie 4177192895Sjamie if (addr == 0) 4178192895Sjamie pr = &prison0; 4179192895Sjamie else { 4180192895Sjamie /* Look for a prison with the ID and with references. */ 4181191673Sjamie TAILQ_FOREACH(pr, &allprison, pr_list) 4182192895Sjamie if (pr->pr_id == addr && pr->pr_ref > 0) 4183191673Sjamie break; 4184192895Sjamie if (pr == NULL) 4185192895Sjamie /* Look again, without requiring a reference. */ 4186192895Sjamie TAILQ_FOREACH(pr, &allprison, pr_list) 4187192895Sjamie if (pr->pr_id == addr) 4188192895Sjamie break; 4189192895Sjamie if (pr == NULL) 4190192895Sjamie /* Assume address points to a valid prison. */ 4191192895Sjamie pr = (struct prison *)addr; 4192192895Sjamie } 4193191673Sjamie db_show_prison(pr); 4194185435Sbz} 4195191673Sjamie 4196185435Sbz#endif /* DDB */ 4197