kern_jail.c revision 280632
1/*- 2 * Copyright (c) 1999 Poul-Henning Kamp. 3 * Copyright (c) 2008 Bjoern A. Zeeb. 4 * Copyright (c) 2009 James Gritton. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/10/sys/kern/kern_jail.c 280632 2015-03-25 20:57:54Z ian $"); 31 32#include "opt_compat.h" 33#include "opt_ddb.h" 34#include "opt_inet.h" 35#include "opt_inet6.h" 36 37#include <sys/param.h> 38#include <sys/types.h> 39#include <sys/kernel.h> 40#include <sys/systm.h> 41#include <sys/errno.h> 42#include <sys/sysproto.h> 43#include <sys/malloc.h> 44#include <sys/osd.h> 45#include <sys/priv.h> 46#include <sys/proc.h> 47#include <sys/taskqueue.h> 48#include <sys/fcntl.h> 49#include <sys/jail.h> 50#include <sys/lock.h> 51#include <sys/mutex.h> 52#include <sys/racct.h> 53#include <sys/refcount.h> 54#include <sys/sx.h> 55#include <sys/sysent.h> 56#include <sys/namei.h> 57#include <sys/mount.h> 58#include <sys/queue.h> 59#include <sys/socket.h> 60#include <sys/syscallsubr.h> 61#include <sys/sysctl.h> 62#include <sys/vnode.h> 63 64#include <net/if.h> 65#include <net/vnet.h> 66 67#include <netinet/in.h> 68 69#ifdef DDB 70#include <ddb/ddb.h> 71#ifdef INET6 72#include <netinet6/in6_var.h> 73#endif /* INET6 */ 74#endif /* DDB */ 75 76#include <security/mac/mac_framework.h> 77 78#define DEFAULT_HOSTUUID "00000000-0000-0000-0000-000000000000" 79 80MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 81static MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures"); 82 83/* Keep struct prison prison0 and some code in kern_jail_set() readable. */ 84#ifdef INET 85#ifdef INET6 86#define _PR_IP_SADDRSEL PR_IP4_SADDRSEL|PR_IP6_SADDRSEL 87#else 88#define _PR_IP_SADDRSEL PR_IP4_SADDRSEL 89#endif 90#else /* !INET */ 91#ifdef INET6 92#define _PR_IP_SADDRSEL PR_IP6_SADDRSEL 93#else 94#define _PR_IP_SADDRSEL 0 95#endif 96#endif 97 98/* prison0 describes what is "real" about the system. */ 99struct prison prison0 = { 100 .pr_id = 0, 101 .pr_name = "0", 102 .pr_ref = 1, 103 .pr_uref = 1, 104 .pr_path = "/", 105 .pr_securelevel = -1, 106 .pr_devfs_rsnum = 0, 107 .pr_childmax = JAIL_MAX, 108 .pr_hostuuid = DEFAULT_HOSTUUID, 109 .pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children), 110#ifdef VIMAGE 111 .pr_flags = PR_HOST|PR_VNET|_PR_IP_SADDRSEL, 112#else 113 .pr_flags = PR_HOST|_PR_IP_SADDRSEL, 114#endif 115 .pr_allow = PR_ALLOW_ALL, 116}; 117MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF); 118 119/* allprison, allprison_racct and lastprid are protected by allprison_lock. */ 120struct sx allprison_lock; 121SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); 122struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); 123LIST_HEAD(, prison_racct) allprison_racct; 124int lastprid = 0; 125 126static int do_jail_attach(struct thread *td, struct prison *pr); 127static void prison_complete(void *context, int pending); 128static void prison_deref(struct prison *pr, int flags); 129static char *prison_path(struct prison *pr1, struct prison *pr2); 130static void prison_remove_one(struct prison *pr); 131#ifdef RACCT 132static void prison_racct_attach(struct prison *pr); 133static void prison_racct_modify(struct prison *pr); 134static void prison_racct_detach(struct prison *pr); 135#endif 136#ifdef INET 137static int _prison_check_ip4(struct prison *pr, struct in_addr *ia); 138static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4); 139#endif 140#ifdef INET6 141static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); 142static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6); 143#endif 144 145/* Flags for prison_deref */ 146#define PD_DEREF 0x01 147#define PD_DEUREF 0x02 148#define PD_LOCKED 0x04 149#define PD_LIST_SLOCKED 0x08 150#define PD_LIST_XLOCKED 0x10 151 152/* 153 * Parameter names corresponding to PR_* flag values. Size values are for kvm 154 * as we cannot figure out the size of a sparse array, or an array without a 155 * terminating entry. 156 */ 157static char *pr_flag_names[] = { 158 [0] = "persist", 159#ifdef INET 160 [7] = "ip4.saddrsel", 161#endif 162#ifdef INET6 163 [8] = "ip6.saddrsel", 164#endif 165}; 166const size_t pr_flag_names_size = sizeof(pr_flag_names); 167 168static char *pr_flag_nonames[] = { 169 [0] = "nopersist", 170#ifdef INET 171 [7] = "ip4.nosaddrsel", 172#endif 173#ifdef INET6 174 [8] = "ip6.nosaddrsel", 175#endif 176}; 177const size_t pr_flag_nonames_size = sizeof(pr_flag_nonames); 178 179struct jailsys_flags { 180 const char *name; 181 unsigned disable; 182 unsigned new; 183} pr_flag_jailsys[] = { 184 { "host", 0, PR_HOST }, 185#ifdef VIMAGE 186 { "vnet", 0, PR_VNET }, 187#endif 188#ifdef INET 189 { "ip4", PR_IP4_USER | PR_IP4_DISABLE, PR_IP4_USER }, 190#endif 191#ifdef INET6 192 { "ip6", PR_IP6_USER | PR_IP6_DISABLE, PR_IP6_USER }, 193#endif 194}; 195const size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys); 196 197static char *pr_allow_names[] = { 198 "allow.set_hostname", 199 "allow.sysvipc", 200 "allow.raw_sockets", 201 "allow.chflags", 202 "allow.mount", 203 "allow.quotas", 204 "allow.socket_af", 205 "allow.mount.devfs", 206 "allow.mount.nullfs", 207 "allow.mount.zfs", 208 "allow.mount.procfs", 209 "allow.mount.tmpfs", 210 "allow.mount.fdescfs", 211}; 212const size_t pr_allow_names_size = sizeof(pr_allow_names); 213 214static char *pr_allow_nonames[] = { 215 "allow.noset_hostname", 216 "allow.nosysvipc", 217 "allow.noraw_sockets", 218 "allow.nochflags", 219 "allow.nomount", 220 "allow.noquotas", 221 "allow.nosocket_af", 222 "allow.mount.nodevfs", 223 "allow.mount.nonullfs", 224 "allow.mount.nozfs", 225 "allow.mount.noprocfs", 226 "allow.mount.notmpfs", 227 "allow.mount.nofdescfs", 228}; 229const size_t pr_allow_nonames_size = sizeof(pr_allow_nonames); 230 231#define JAIL_DEFAULT_ALLOW PR_ALLOW_SET_HOSTNAME 232#define JAIL_DEFAULT_ENFORCE_STATFS 2 233#define JAIL_DEFAULT_DEVFS_RSNUM 0 234static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; 235static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 236static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM; 237#if defined(INET) || defined(INET6) 238static unsigned jail_max_af_ips = 255; 239#endif 240 241/* 242 * Initialize the parts of prison0 that can't be static-initialized with 243 * constants. This is called from proc0_init() after creating thread0 cpuset. 244 */ 245void 246prison0_init(void) 247{ 248 249 prison0.pr_cpuset = cpuset_ref(thread0.td_cpuset); 250 prison0.pr_osreldate = osreldate; 251 strlcpy(prison0.pr_osrelease, osrelease, sizeof(prison0.pr_osrelease)); 252} 253 254#ifdef INET 255static int 256qcmp_v4(const void *ip1, const void *ip2) 257{ 258 in_addr_t iaa, iab; 259 260 /* 261 * We need to compare in HBO here to get the list sorted as expected 262 * by the result of the code. Sorting NBO addresses gives you 263 * interesting results. If you do not understand, do not try. 264 */ 265 iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 266 iab = ntohl(((const struct in_addr *)ip2)->s_addr); 267 268 /* 269 * Do not simply return the difference of the two numbers, the int is 270 * not wide enough. 271 */ 272 if (iaa > iab) 273 return (1); 274 else if (iaa < iab) 275 return (-1); 276 else 277 return (0); 278} 279#endif 280 281#ifdef INET6 282static int 283qcmp_v6(const void *ip1, const void *ip2) 284{ 285 const struct in6_addr *ia6a, *ia6b; 286 int i, rc; 287 288 ia6a = (const struct in6_addr *)ip1; 289 ia6b = (const struct in6_addr *)ip2; 290 291 rc = 0; 292 for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { 293 if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) 294 rc = 1; 295 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) 296 rc = -1; 297 } 298 return (rc); 299} 300#endif 301 302/* 303 * struct jail_args { 304 * struct jail *jail; 305 * }; 306 */ 307int 308sys_jail(struct thread *td, struct jail_args *uap) 309{ 310 uint32_t version; 311 int error; 312 struct jail j; 313 314 error = copyin(uap->jail, &version, sizeof(uint32_t)); 315 if (error) 316 return (error); 317 318 switch (version) { 319 case 0: 320 { 321 struct jail_v0 j0; 322 323 /* FreeBSD single IPv4 jails. */ 324 bzero(&j, sizeof(struct jail)); 325 error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); 326 if (error) 327 return (error); 328 j.version = j0.version; 329 j.path = j0.path; 330 j.hostname = j0.hostname; 331 j.ip4s = htonl(j0.ip_number); /* jail_v0 is host order */ 332 break; 333 } 334 335 case 1: 336 /* 337 * Version 1 was used by multi-IPv4 jail implementations 338 * that never made it into the official kernel. 339 */ 340 return (EINVAL); 341 342 case 2: /* JAIL_API_VERSION */ 343 /* FreeBSD multi-IPv4/IPv6,noIP jails. */ 344 error = copyin(uap->jail, &j, sizeof(struct jail)); 345 if (error) 346 return (error); 347 break; 348 349 default: 350 /* Sci-Fi jails are not supported, sorry. */ 351 return (EINVAL); 352 } 353 return (kern_jail(td, &j)); 354} 355 356int 357kern_jail(struct thread *td, struct jail *j) 358{ 359 struct iovec optiov[2 * (4 360 + sizeof(pr_allow_names) / sizeof(pr_allow_names[0]) 361#ifdef INET 362 + 1 363#endif 364#ifdef INET6 365 + 1 366#endif 367 )]; 368 struct uio opt; 369 char *u_path, *u_hostname, *u_name; 370#ifdef INET 371 uint32_t ip4s; 372 struct in_addr *u_ip4; 373#endif 374#ifdef INET6 375 struct in6_addr *u_ip6; 376#endif 377 size_t tmplen; 378 int error, enforce_statfs, fi; 379 380 bzero(&optiov, sizeof(optiov)); 381 opt.uio_iov = optiov; 382 opt.uio_iovcnt = 0; 383 opt.uio_offset = -1; 384 opt.uio_resid = -1; 385 opt.uio_segflg = UIO_SYSSPACE; 386 opt.uio_rw = UIO_READ; 387 opt.uio_td = td; 388 389 /* Set permissions for top-level jails from sysctls. */ 390 if (!jailed(td->td_ucred)) { 391 for (fi = 0; fi < sizeof(pr_allow_names) / 392 sizeof(pr_allow_names[0]); fi++) { 393 optiov[opt.uio_iovcnt].iov_base = 394 (jail_default_allow & (1 << fi)) 395 ? pr_allow_names[fi] : pr_allow_nonames[fi]; 396 optiov[opt.uio_iovcnt].iov_len = 397 strlen(optiov[opt.uio_iovcnt].iov_base) + 1; 398 opt.uio_iovcnt += 2; 399 } 400 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; 401 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); 402 opt.uio_iovcnt++; 403 enforce_statfs = jail_default_enforce_statfs; 404 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; 405 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); 406 opt.uio_iovcnt++; 407 } 408 409 tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; 410#ifdef INET 411 ip4s = (j->version == 0) ? 1 : j->ip4s; 412 if (ip4s > jail_max_af_ips) 413 return (EINVAL); 414 tmplen += ip4s * sizeof(struct in_addr); 415#else 416 if (j->ip4s > 0) 417 return (EINVAL); 418#endif 419#ifdef INET6 420 if (j->ip6s > jail_max_af_ips) 421 return (EINVAL); 422 tmplen += j->ip6s * sizeof(struct in6_addr); 423#else 424 if (j->ip6s > 0) 425 return (EINVAL); 426#endif 427 u_path = malloc(tmplen, M_TEMP, M_WAITOK); 428 u_hostname = u_path + MAXPATHLEN; 429 u_name = u_hostname + MAXHOSTNAMELEN; 430#ifdef INET 431 u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); 432#endif 433#ifdef INET6 434#ifdef INET 435 u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); 436#else 437 u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); 438#endif 439#endif 440 optiov[opt.uio_iovcnt].iov_base = "path"; 441 optiov[opt.uio_iovcnt].iov_len = sizeof("path"); 442 opt.uio_iovcnt++; 443 optiov[opt.uio_iovcnt].iov_base = u_path; 444 error = copyinstr(j->path, u_path, MAXPATHLEN, 445 &optiov[opt.uio_iovcnt].iov_len); 446 if (error) { 447 free(u_path, M_TEMP); 448 return (error); 449 } 450 opt.uio_iovcnt++; 451 optiov[opt.uio_iovcnt].iov_base = "host.hostname"; 452 optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); 453 opt.uio_iovcnt++; 454 optiov[opt.uio_iovcnt].iov_base = u_hostname; 455 error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, 456 &optiov[opt.uio_iovcnt].iov_len); 457 if (error) { 458 free(u_path, M_TEMP); 459 return (error); 460 } 461 opt.uio_iovcnt++; 462 if (j->jailname != NULL) { 463 optiov[opt.uio_iovcnt].iov_base = "name"; 464 optiov[opt.uio_iovcnt].iov_len = sizeof("name"); 465 opt.uio_iovcnt++; 466 optiov[opt.uio_iovcnt].iov_base = u_name; 467 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, 468 &optiov[opt.uio_iovcnt].iov_len); 469 if (error) { 470 free(u_path, M_TEMP); 471 return (error); 472 } 473 opt.uio_iovcnt++; 474 } 475#ifdef INET 476 optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 477 optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 478 opt.uio_iovcnt++; 479 optiov[opt.uio_iovcnt].iov_base = u_ip4; 480 optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); 481 if (j->version == 0) 482 u_ip4->s_addr = j->ip4s; 483 else { 484 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); 485 if (error) { 486 free(u_path, M_TEMP); 487 return (error); 488 } 489 } 490 opt.uio_iovcnt++; 491#endif 492#ifdef INET6 493 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; 494 optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); 495 opt.uio_iovcnt++; 496 optiov[opt.uio_iovcnt].iov_base = u_ip6; 497 optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); 498 error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); 499 if (error) { 500 free(u_path, M_TEMP); 501 return (error); 502 } 503 opt.uio_iovcnt++; 504#endif 505 KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]), 506 ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); 507 error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); 508 free(u_path, M_TEMP); 509 return (error); 510} 511 512 513/* 514 * struct jail_set_args { 515 * struct iovec *iovp; 516 * unsigned int iovcnt; 517 * int flags; 518 * }; 519 */ 520int 521sys_jail_set(struct thread *td, struct jail_set_args *uap) 522{ 523 struct uio *auio; 524 int error; 525 526 /* Check that we have an even number of iovecs. */ 527 if (uap->iovcnt & 1) 528 return (EINVAL); 529 530 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 531 if (error) 532 return (error); 533 error = kern_jail_set(td, auio, uap->flags); 534 free(auio, M_IOV); 535 return (error); 536} 537 538int 539kern_jail_set(struct thread *td, struct uio *optuio, int flags) 540{ 541 struct nameidata nd; 542#ifdef INET 543 struct in_addr *ip4; 544#endif 545#ifdef INET6 546 struct in6_addr *ip6; 547#endif 548 struct vfsopt *opt; 549 struct vfsoptlist *opts; 550 struct prison *pr, *deadpr, *mypr, *ppr, *tpr; 551 struct vnode *root; 552 char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid; 553 char *g_path, *osrelstr; 554#if defined(INET) || defined(INET6) 555 struct prison *tppr; 556 void *op; 557#endif 558 unsigned long hid; 559 size_t namelen, onamelen; 560 int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos; 561 int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; 562 int fi, jid, jsys, len, level; 563 int childmax, osreldt, rsnum, slevel; 564 int fullpath_disabled; 565#if defined(INET) || defined(INET6) 566 int ii, ij; 567#endif 568#ifdef INET 569 int ip4s, redo_ip4; 570#endif 571#ifdef INET6 572 int ip6s, redo_ip6; 573#endif 574 uint64_t pr_allow, ch_allow, pr_flags, ch_flags; 575 unsigned tallow; 576 char numbuf[12]; 577 578 error = priv_check(td, PRIV_JAIL_SET); 579 if (!error && (flags & JAIL_ATTACH)) 580 error = priv_check(td, PRIV_JAIL_ATTACH); 581 if (error) 582 return (error); 583 mypr = ppr = td->td_ucred->cr_prison; 584 if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) 585 return (EPERM); 586 if (flags & ~JAIL_SET_MASK) 587 return (EINVAL); 588 589 /* 590 * Check all the parameters before committing to anything. Not all 591 * errors can be caught early, but we may as well try. Also, this 592 * takes care of some expensive stuff (path lookup) before getting 593 * the allprison lock. 594 * 595 * XXX Jails are not filesystems, and jail parameters are not mount 596 * options. But it makes more sense to re-use the vfsopt code 597 * than duplicate it under a different name. 598 */ 599 error = vfs_buildopts(optuio, &opts); 600 if (error) 601 return (error); 602#ifdef INET 603 ip4 = NULL; 604#endif 605#ifdef INET6 606 ip6 = NULL; 607#endif 608 g_path = NULL; 609 610 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 611 if (error == ENOENT) 612 jid = 0; 613 else if (error != 0) 614 goto done_free; 615 616 error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); 617 if (error == ENOENT) 618 gotslevel = 0; 619 else if (error != 0) 620 goto done_free; 621 else 622 gotslevel = 1; 623 624 error = 625 vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax)); 626 if (error == ENOENT) 627 gotchildmax = 0; 628 else if (error != 0) 629 goto done_free; 630 else 631 gotchildmax = 1; 632 633 error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); 634 if (error == ENOENT) 635 gotenforce = 0; 636 else if (error != 0) 637 goto done_free; 638 else if (enforce < 0 || enforce > 2) { 639 error = EINVAL; 640 goto done_free; 641 } else 642 gotenforce = 1; 643 644 error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum)); 645 if (error == ENOENT) 646 gotrsnum = 0; 647 else if (error != 0) 648 goto done_free; 649 else 650 gotrsnum = 1; 651 652 pr_flags = ch_flags = 0; 653 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 654 fi++) { 655 if (pr_flag_names[fi] == NULL) 656 continue; 657 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi); 658 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi); 659 } 660 ch_flags |= pr_flags; 661 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 662 fi++) { 663 error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys, 664 sizeof(jsys)); 665 if (error == ENOENT) 666 continue; 667 if (error != 0) 668 goto done_free; 669 switch (jsys) { 670 case JAIL_SYS_DISABLE: 671 if (!pr_flag_jailsys[fi].disable) { 672 error = EINVAL; 673 goto done_free; 674 } 675 pr_flags |= pr_flag_jailsys[fi].disable; 676 break; 677 case JAIL_SYS_NEW: 678 pr_flags |= pr_flag_jailsys[fi].new; 679 break; 680 case JAIL_SYS_INHERIT: 681 break; 682 default: 683 error = EINVAL; 684 goto done_free; 685 } 686 ch_flags |= 687 pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable; 688 } 689 if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE 690 && !(pr_flags & PR_PERSIST)) { 691 error = EINVAL; 692 vfs_opterror(opts, "new jail must persist or attach"); 693 goto done_errmsg; 694 } 695#ifdef VIMAGE 696 if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) { 697 error = EINVAL; 698 vfs_opterror(opts, "vnet cannot be changed after creation"); 699 goto done_errmsg; 700 } 701#endif 702#ifdef INET 703 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) { 704 error = EINVAL; 705 vfs_opterror(opts, "ip4 cannot be changed after creation"); 706 goto done_errmsg; 707 } 708#endif 709#ifdef INET6 710 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) { 711 error = EINVAL; 712 vfs_opterror(opts, "ip6 cannot be changed after creation"); 713 goto done_errmsg; 714 } 715#endif 716 717 pr_allow = ch_allow = 0; 718 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 719 fi++) { 720 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi); 721 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi); 722 } 723 ch_allow |= pr_allow; 724 725 error = vfs_getopt(opts, "name", (void **)&name, &len); 726 if (error == ENOENT) 727 name = NULL; 728 else if (error != 0) 729 goto done_free; 730 else { 731 if (len == 0 || name[len - 1] != '\0') { 732 error = EINVAL; 733 goto done_free; 734 } 735 if (len > MAXHOSTNAMELEN) { 736 error = ENAMETOOLONG; 737 goto done_free; 738 } 739 } 740 741 error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); 742 if (error == ENOENT) 743 host = NULL; 744 else if (error != 0) 745 goto done_free; 746 else { 747 ch_flags |= PR_HOST; 748 pr_flags |= PR_HOST; 749 if (len == 0 || host[len - 1] != '\0') { 750 error = EINVAL; 751 goto done_free; 752 } 753 if (len > MAXHOSTNAMELEN) { 754 error = ENAMETOOLONG; 755 goto done_free; 756 } 757 } 758 759 error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); 760 if (error == ENOENT) 761 domain = NULL; 762 else if (error != 0) 763 goto done_free; 764 else { 765 ch_flags |= PR_HOST; 766 pr_flags |= PR_HOST; 767 if (len == 0 || domain[len - 1] != '\0') { 768 error = EINVAL; 769 goto done_free; 770 } 771 if (len > MAXHOSTNAMELEN) { 772 error = ENAMETOOLONG; 773 goto done_free; 774 } 775 } 776 777 error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); 778 if (error == ENOENT) 779 uuid = NULL; 780 else if (error != 0) 781 goto done_free; 782 else { 783 ch_flags |= PR_HOST; 784 pr_flags |= PR_HOST; 785 if (len == 0 || uuid[len - 1] != '\0') { 786 error = EINVAL; 787 goto done_free; 788 } 789 if (len > HOSTUUIDLEN) { 790 error = ENAMETOOLONG; 791 goto done_free; 792 } 793 } 794 795#ifdef COMPAT_FREEBSD32 796 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 797 uint32_t hid32; 798 799 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); 800 hid = hid32; 801 } else 802#endif 803 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); 804 if (error == ENOENT) 805 gothid = 0; 806 else if (error != 0) 807 goto done_free; 808 else { 809 gothid = 1; 810 ch_flags |= PR_HOST; 811 pr_flags |= PR_HOST; 812 } 813 814#ifdef INET 815 error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); 816 if (error == ENOENT) 817 ip4s = 0; 818 else if (error != 0) 819 goto done_free; 820 else if (ip4s & (sizeof(*ip4) - 1)) { 821 error = EINVAL; 822 goto done_free; 823 } else { 824 ch_flags |= PR_IP4_USER | PR_IP4_DISABLE; 825 if (ip4s == 0) 826 pr_flags |= PR_IP4_USER | PR_IP4_DISABLE; 827 else { 828 pr_flags = (pr_flags & ~PR_IP4_DISABLE) | PR_IP4_USER; 829 ip4s /= sizeof(*ip4); 830 if (ip4s > jail_max_af_ips) { 831 error = EINVAL; 832 vfs_opterror(opts, "too many IPv4 addresses"); 833 goto done_errmsg; 834 } 835 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 836 bcopy(op, ip4, ip4s * sizeof(*ip4)); 837 /* 838 * IP addresses are all sorted but ip[0] to preserve 839 * the primary IP address as given from userland. 840 * This special IP is used for unbound outgoing 841 * connections as well for "loopback" traffic in case 842 * source address selection cannot find any more fitting 843 * address to connect from. 844 */ 845 if (ip4s > 1) 846 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); 847 /* 848 * Check for duplicate addresses and do some simple 849 * zero and broadcast checks. If users give other bogus 850 * addresses it is their problem. 851 * 852 * We do not have to care about byte order for these 853 * checks so we will do them in NBO. 854 */ 855 for (ii = 0; ii < ip4s; ii++) { 856 if (ip4[ii].s_addr == INADDR_ANY || 857 ip4[ii].s_addr == INADDR_BROADCAST) { 858 error = EINVAL; 859 goto done_free; 860 } 861 if ((ii+1) < ip4s && 862 (ip4[0].s_addr == ip4[ii+1].s_addr || 863 ip4[ii].s_addr == ip4[ii+1].s_addr)) { 864 error = EINVAL; 865 goto done_free; 866 } 867 } 868 } 869 } 870#endif 871 872#ifdef INET6 873 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); 874 if (error == ENOENT) 875 ip6s = 0; 876 else if (error != 0) 877 goto done_free; 878 else if (ip6s & (sizeof(*ip6) - 1)) { 879 error = EINVAL; 880 goto done_free; 881 } else { 882 ch_flags |= PR_IP6_USER | PR_IP6_DISABLE; 883 if (ip6s == 0) 884 pr_flags |= PR_IP6_USER | PR_IP6_DISABLE; 885 else { 886 pr_flags = (pr_flags & ~PR_IP6_DISABLE) | PR_IP6_USER; 887 ip6s /= sizeof(*ip6); 888 if (ip6s > jail_max_af_ips) { 889 error = EINVAL; 890 vfs_opterror(opts, "too many IPv6 addresses"); 891 goto done_errmsg; 892 } 893 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 894 bcopy(op, ip6, ip6s * sizeof(*ip6)); 895 if (ip6s > 1) 896 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); 897 for (ii = 0; ii < ip6s; ii++) { 898 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) { 899 error = EINVAL; 900 goto done_free; 901 } 902 if ((ii+1) < ip6s && 903 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || 904 IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) 905 { 906 error = EINVAL; 907 goto done_free; 908 } 909 } 910 } 911 } 912#endif 913 914#if defined(VIMAGE) && (defined(INET) || defined(INET6)) 915 if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 916 error = EINVAL; 917 vfs_opterror(opts, 918 "vnet jails cannot have IP address restrictions"); 919 goto done_errmsg; 920 } 921#endif 922 923 fullpath_disabled = 0; 924 root = NULL; 925 error = vfs_getopt(opts, "path", (void **)&path, &len); 926 if (error == ENOENT) 927 path = NULL; 928 else if (error != 0) 929 goto done_free; 930 else { 931 if (flags & JAIL_UPDATE) { 932 error = EINVAL; 933 vfs_opterror(opts, 934 "path cannot be changed after creation"); 935 goto done_errmsg; 936 } 937 if (len == 0 || path[len - 1] != '\0') { 938 error = EINVAL; 939 goto done_free; 940 } 941 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, 942 path, td); 943 error = namei(&nd); 944 if (error) 945 goto done_free; 946 root = nd.ni_vp; 947 NDFREE(&nd, NDF_ONLY_PNBUF); 948 g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 949 strlcpy(g_path, path, MAXPATHLEN); 950 error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN); 951 if (error == 0) 952 path = g_path; 953 else if (error == ENODEV) { 954 /* proceed if sysctl debug.disablefullpath == 1 */ 955 fullpath_disabled = 1; 956 if (len < 2 || (len == 2 && path[0] == '/')) 957 path = NULL; 958 } else { 959 /* exit on other errors */ 960 goto done_free; 961 } 962 if (root->v_type != VDIR) { 963 error = ENOTDIR; 964 vput(root); 965 goto done_free; 966 } 967 VOP_UNLOCK(root, 0); 968 if (fullpath_disabled) { 969 /* Leave room for a real-root full pathname. */ 970 if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") 971 ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { 972 error = ENAMETOOLONG; 973 goto done_free; 974 } 975 } 976 } 977 978 error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len); 979 if (error == ENOENT) 980 osrelstr = NULL; 981 else if (error != 0) 982 goto done_free; 983 else { 984 if (flags & JAIL_UPDATE) { 985 error = EINVAL; 986 vfs_opterror(opts, 987 "osrelease cannot be changed after creation"); 988 goto done_errmsg; 989 } 990 if (len == 0 || len >= OSRELEASELEN) { 991 error = EINVAL; 992 vfs_opterror(opts, 993 "osrelease string must be 1-%d bytes long", 994 OSRELEASELEN - 1); 995 goto done_errmsg; 996 } 997 } 998 999 error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt)); 1000 if (error == ENOENT) 1001 osreldt = 0; 1002 else if (error != 0) 1003 goto done_free; 1004 else { 1005 if (flags & JAIL_UPDATE) { 1006 error = EINVAL; 1007 vfs_opterror(opts, 1008 "osreldate cannot be changed after creation"); 1009 goto done_errmsg; 1010 } 1011 if (osreldt == 0) { 1012 error = EINVAL; 1013 vfs_opterror(opts, "osreldate cannot be 0"); 1014 goto done_errmsg; 1015 } 1016 } 1017 1018 /* 1019 * Grab the allprison lock before letting modules check their 1020 * parameters. Once we have it, do not let go so we'll have a 1021 * consistent view of the OSD list. 1022 */ 1023 sx_xlock(&allprison_lock); 1024 error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); 1025 if (error) 1026 goto done_unlock_list; 1027 1028 /* By now, all parameters should have been noted. */ 1029 TAILQ_FOREACH(opt, opts, link) { 1030 if (!opt->seen && strcmp(opt->name, "errmsg")) { 1031 error = EINVAL; 1032 vfs_opterror(opts, "unknown parameter: %s", opt->name); 1033 goto done_unlock_list; 1034 } 1035 } 1036 1037 /* 1038 * See if we are creating a new record or updating an existing one. 1039 * This abuses the file error codes ENOENT and EEXIST. 1040 */ 1041 cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); 1042 if (!cuflags) { 1043 error = EINVAL; 1044 vfs_opterror(opts, "no valid operation (create or update)"); 1045 goto done_unlock_list; 1046 } 1047 pr = NULL; 1048 namelc = NULL; 1049 if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) { 1050 namelc = strrchr(name, '.'); 1051 jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10); 1052 if (*p != '\0') 1053 jid = 0; 1054 } 1055 if (jid != 0) { 1056 /* 1057 * See if a requested jid already exists. There is an 1058 * information leak here if the jid exists but is not within 1059 * the caller's jail hierarchy. Jail creators will get EEXIST 1060 * even though they cannot see the jail, and CREATE | UPDATE 1061 * will return ENOENT which is not normally a valid error. 1062 */ 1063 if (jid < 0) { 1064 error = EINVAL; 1065 vfs_opterror(opts, "negative jid"); 1066 goto done_unlock_list; 1067 } 1068 pr = prison_find(jid); 1069 if (pr != NULL) { 1070 ppr = pr->pr_parent; 1071 /* Create: jid must not exist. */ 1072 if (cuflags == JAIL_CREATE) { 1073 mtx_unlock(&pr->pr_mtx); 1074 error = EEXIST; 1075 vfs_opterror(opts, "jail %d already exists", 1076 jid); 1077 goto done_unlock_list; 1078 } 1079 if (!prison_ischild(mypr, pr)) { 1080 mtx_unlock(&pr->pr_mtx); 1081 pr = NULL; 1082 } else if (pr->pr_uref == 0) { 1083 if (!(flags & JAIL_DYING)) { 1084 mtx_unlock(&pr->pr_mtx); 1085 error = ENOENT; 1086 vfs_opterror(opts, "jail %d is dying", 1087 jid); 1088 goto done_unlock_list; 1089 } else if ((flags & JAIL_ATTACH) || 1090 (pr_flags & PR_PERSIST)) { 1091 /* 1092 * A dying jail might be resurrected 1093 * (via attach or persist), but first 1094 * it must determine if another jail 1095 * has claimed its name. Accomplish 1096 * this by implicitly re-setting the 1097 * name. 1098 */ 1099 if (name == NULL) 1100 name = prison_name(mypr, pr); 1101 } 1102 } 1103 } 1104 if (pr == NULL) { 1105 /* Update: jid must exist. */ 1106 if (cuflags == JAIL_UPDATE) { 1107 error = ENOENT; 1108 vfs_opterror(opts, "jail %d not found", jid); 1109 goto done_unlock_list; 1110 } 1111 } 1112 } 1113 /* 1114 * If the caller provided a name, look for a jail by that name. 1115 * This has different semantics for creates and updates keyed by jid 1116 * (where the name must not already exist in a different jail), 1117 * and updates keyed by the name itself (where the name must exist 1118 * because that is the jail being updated). 1119 */ 1120 if (name != NULL) { 1121 namelc = strrchr(name, '.'); 1122 if (namelc == NULL) 1123 namelc = name; 1124 else { 1125 /* 1126 * This is a hierarchical name. Split it into the 1127 * parent and child names, and make sure the parent 1128 * exists or matches an already found jail. 1129 */ 1130 *namelc = '\0'; 1131 if (pr != NULL) { 1132 if (strncmp(name, ppr->pr_name, namelc - name) 1133 || ppr->pr_name[namelc - name] != '\0') { 1134 mtx_unlock(&pr->pr_mtx); 1135 error = EINVAL; 1136 vfs_opterror(opts, 1137 "cannot change jail's parent"); 1138 goto done_unlock_list; 1139 } 1140 } else { 1141 ppr = prison_find_name(mypr, name); 1142 if (ppr == NULL) { 1143 error = ENOENT; 1144 vfs_opterror(opts, 1145 "jail \"%s\" not found", name); 1146 goto done_unlock_list; 1147 } 1148 mtx_unlock(&ppr->pr_mtx); 1149 } 1150 name = ++namelc; 1151 } 1152 if (name[0] != '\0') { 1153 namelen = 1154 (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; 1155 name_again: 1156 deadpr = NULL; 1157 FOREACH_PRISON_CHILD(ppr, tpr) { 1158 if (tpr != pr && tpr->pr_ref > 0 && 1159 !strcmp(tpr->pr_name + namelen, name)) { 1160 if (pr == NULL && 1161 cuflags != JAIL_CREATE) { 1162 mtx_lock(&tpr->pr_mtx); 1163 if (tpr->pr_ref > 0) { 1164 /* 1165 * Use this jail 1166 * for updates. 1167 */ 1168 if (tpr->pr_uref > 0) { 1169 pr = tpr; 1170 break; 1171 } 1172 deadpr = tpr; 1173 } 1174 mtx_unlock(&tpr->pr_mtx); 1175 } else if (tpr->pr_uref > 0) { 1176 /* 1177 * Create, or update(jid): 1178 * name must not exist in an 1179 * active sibling jail. 1180 */ 1181 error = EEXIST; 1182 if (pr != NULL) 1183 mtx_unlock(&pr->pr_mtx); 1184 vfs_opterror(opts, 1185 "jail \"%s\" already exists", 1186 name); 1187 goto done_unlock_list; 1188 } 1189 } 1190 } 1191 /* If no active jail is found, use a dying one. */ 1192 if (deadpr != NULL && pr == NULL) { 1193 if (flags & JAIL_DYING) { 1194 mtx_lock(&deadpr->pr_mtx); 1195 if (deadpr->pr_ref == 0) { 1196 mtx_unlock(&deadpr->pr_mtx); 1197 goto name_again; 1198 } 1199 pr = deadpr; 1200 } else if (cuflags == JAIL_UPDATE) { 1201 error = ENOENT; 1202 vfs_opterror(opts, 1203 "jail \"%s\" is dying", name); 1204 goto done_unlock_list; 1205 } 1206 } 1207 /* Update: name must exist if no jid. */ 1208 else if (cuflags == JAIL_UPDATE && pr == NULL) { 1209 error = ENOENT; 1210 vfs_opterror(opts, "jail \"%s\" not found", 1211 name); 1212 goto done_unlock_list; 1213 } 1214 } 1215 } 1216 /* Update: must provide a jid or name. */ 1217 else if (cuflags == JAIL_UPDATE && pr == NULL) { 1218 error = ENOENT; 1219 vfs_opterror(opts, "update specified no jail"); 1220 goto done_unlock_list; 1221 } 1222 1223 /* If there's no prison to update, create a new one and link it in. */ 1224 if (pr == NULL) { 1225 for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) 1226 if (tpr->pr_childcount >= tpr->pr_childmax) { 1227 error = EPERM; 1228 vfs_opterror(opts, "prison limit exceeded"); 1229 goto done_unlock_list; 1230 } 1231 created = 1; 1232 mtx_lock(&ppr->pr_mtx); 1233 if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) { 1234 mtx_unlock(&ppr->pr_mtx); 1235 error = ENOENT; 1236 vfs_opterror(opts, "parent jail went away!"); 1237 goto done_unlock_list; 1238 } 1239 ppr->pr_ref++; 1240 ppr->pr_uref++; 1241 mtx_unlock(&ppr->pr_mtx); 1242 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 1243 if (jid == 0) { 1244 /* Find the next free jid. */ 1245 jid = lastprid + 1; 1246 findnext: 1247 if (jid == JAIL_MAX) 1248 jid = 1; 1249 TAILQ_FOREACH(tpr, &allprison, pr_list) { 1250 if (tpr->pr_id < jid) 1251 continue; 1252 if (tpr->pr_id > jid || tpr->pr_ref == 0) { 1253 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1254 break; 1255 } 1256 if (jid == lastprid) { 1257 error = EAGAIN; 1258 vfs_opterror(opts, 1259 "no available jail IDs"); 1260 free(pr, M_PRISON); 1261 prison_deref(ppr, PD_DEREF | 1262 PD_DEUREF | PD_LIST_XLOCKED); 1263 goto done_releroot; 1264 } 1265 jid++; 1266 goto findnext; 1267 } 1268 lastprid = jid; 1269 } else { 1270 /* 1271 * The jail already has a jid (that did not yet exist), 1272 * so just find where to insert it. 1273 */ 1274 TAILQ_FOREACH(tpr, &allprison, pr_list) 1275 if (tpr->pr_id >= jid) { 1276 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1277 break; 1278 } 1279 } 1280 if (tpr == NULL) 1281 TAILQ_INSERT_TAIL(&allprison, pr, pr_list); 1282 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); 1283 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 1284 tpr->pr_childcount++; 1285 1286 pr->pr_parent = ppr; 1287 pr->pr_id = jid; 1288 1289 /* Set some default values, and inherit some from the parent. */ 1290 if (name == NULL) 1291 name = ""; 1292 if (path == NULL) { 1293 path = "/"; 1294 root = mypr->pr_root; 1295 vref(root); 1296 } 1297 strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN); 1298 pr->pr_flags |= PR_HOST; 1299#if defined(INET) || defined(INET6) 1300#ifdef VIMAGE 1301 if (!(pr_flags & PR_VNET)) 1302#endif 1303 { 1304#ifdef INET 1305 if (!(ch_flags & PR_IP4_USER)) 1306 pr->pr_flags |= 1307 PR_IP4 | PR_IP4_USER | PR_IP4_DISABLE; 1308 else if (!(pr_flags & PR_IP4_USER)) { 1309 pr->pr_flags |= ppr->pr_flags & PR_IP4; 1310 if (ppr->pr_ip4 != NULL) { 1311 pr->pr_ip4s = ppr->pr_ip4s; 1312 pr->pr_ip4 = malloc(pr->pr_ip4s * 1313 sizeof(struct in_addr), M_PRISON, 1314 M_WAITOK); 1315 bcopy(ppr->pr_ip4, pr->pr_ip4, 1316 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1317 } 1318 } 1319#endif 1320#ifdef INET6 1321 if (!(ch_flags & PR_IP6_USER)) 1322 pr->pr_flags |= 1323 PR_IP6 | PR_IP6_USER | PR_IP6_DISABLE; 1324 else if (!(pr_flags & PR_IP6_USER)) { 1325 pr->pr_flags |= ppr->pr_flags & PR_IP6; 1326 if (ppr->pr_ip6 != NULL) { 1327 pr->pr_ip6s = ppr->pr_ip6s; 1328 pr->pr_ip6 = malloc(pr->pr_ip6s * 1329 sizeof(struct in6_addr), M_PRISON, 1330 M_WAITOK); 1331 bcopy(ppr->pr_ip6, pr->pr_ip6, 1332 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1333 } 1334 } 1335#endif 1336 } 1337#endif 1338 /* Source address selection is always on by default. */ 1339 pr->pr_flags |= _PR_IP_SADDRSEL; 1340 1341 pr->pr_securelevel = ppr->pr_securelevel; 1342 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; 1343 pr->pr_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 1344 pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum; 1345 1346 pr->pr_osreldate = osreldt ? osreldt : ppr->pr_osreldate; 1347 if (osrelstr == NULL) 1348 strcpy(pr->pr_osrelease, ppr->pr_osrelease); 1349 else 1350 strcpy(pr->pr_osrelease, osrelstr); 1351 1352 LIST_INIT(&pr->pr_children); 1353 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); 1354 1355#ifdef VIMAGE 1356 /* Allocate a new vnet if specified. */ 1357 pr->pr_vnet = (pr_flags & PR_VNET) 1358 ? vnet_alloc() : ppr->pr_vnet; 1359#endif 1360 /* 1361 * Allocate a dedicated cpuset for each jail. 1362 * Unlike other initial settings, this may return an erorr. 1363 */ 1364 error = cpuset_create_root(ppr, &pr->pr_cpuset); 1365 if (error) { 1366 prison_deref(pr, PD_LIST_XLOCKED); 1367 goto done_releroot; 1368 } 1369 1370 mtx_lock(&pr->pr_mtx); 1371 /* 1372 * New prisons do not yet have a reference, because we do not 1373 * want other to see the incomplete prison once the 1374 * allprison_lock is downgraded. 1375 */ 1376 } else { 1377 created = 0; 1378 /* 1379 * Grab a reference for existing prisons, to ensure they 1380 * continue to exist for the duration of the call. 1381 */ 1382 pr->pr_ref++; 1383#if defined(VIMAGE) && (defined(INET) || defined(INET6)) 1384 if ((pr->pr_flags & PR_VNET) && 1385 (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 1386 error = EINVAL; 1387 vfs_opterror(opts, 1388 "vnet jails cannot have IP address restrictions"); 1389 goto done_deref_locked; 1390 } 1391#endif 1392#ifdef INET 1393 if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1394 error = EINVAL; 1395 vfs_opterror(opts, 1396 "ip4 cannot be changed after creation"); 1397 goto done_deref_locked; 1398 } 1399#endif 1400#ifdef INET6 1401 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1402 error = EINVAL; 1403 vfs_opterror(opts, 1404 "ip6 cannot be changed after creation"); 1405 goto done_deref_locked; 1406 } 1407#endif 1408 } 1409 1410 /* Do final error checking before setting anything. */ 1411 if (gotslevel) { 1412 if (slevel < ppr->pr_securelevel) { 1413 error = EPERM; 1414 goto done_deref_locked; 1415 } 1416 } 1417 if (gotchildmax) { 1418 if (childmax >= ppr->pr_childmax) { 1419 error = EPERM; 1420 goto done_deref_locked; 1421 } 1422 } 1423 if (gotenforce) { 1424 if (enforce < ppr->pr_enforce_statfs) { 1425 error = EPERM; 1426 goto done_deref_locked; 1427 } 1428 } 1429 if (gotrsnum) { 1430 /* 1431 * devfs_rsnum is a uint16_t 1432 */ 1433 if (rsnum < 0 || rsnum > 65535) { 1434 error = EINVAL; 1435 goto done_deref_locked; 1436 } 1437 /* 1438 * Nested jails always inherit parent's devfs ruleset 1439 */ 1440 if (jailed(td->td_ucred)) { 1441 if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) { 1442 error = EPERM; 1443 goto done_deref_locked; 1444 } else 1445 rsnum = ppr->pr_devfs_rsnum; 1446 } 1447 } 1448#ifdef INET 1449 if (ip4s > 0) { 1450 if (ppr->pr_flags & PR_IP4) { 1451 /* 1452 * Make sure the new set of IP addresses is a 1453 * subset of the parent's list. Don't worry 1454 * about the parent being unlocked, as any 1455 * setting is done with allprison_lock held. 1456 */ 1457 for (ij = 0; ij < ppr->pr_ip4s; ij++) 1458 if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 1459 break; 1460 if (ij == ppr->pr_ip4s) { 1461 error = EPERM; 1462 goto done_deref_locked; 1463 } 1464 if (ip4s > 1) { 1465 for (ii = ij = 1; ii < ip4s; ii++) { 1466 if (ip4[ii].s_addr == 1467 ppr->pr_ip4[0].s_addr) 1468 continue; 1469 for (; ij < ppr->pr_ip4s; ij++) 1470 if (ip4[ii].s_addr == 1471 ppr->pr_ip4[ij].s_addr) 1472 break; 1473 if (ij == ppr->pr_ip4s) 1474 break; 1475 } 1476 if (ij == ppr->pr_ip4s) { 1477 error = EPERM; 1478 goto done_deref_locked; 1479 } 1480 } 1481 } 1482 /* 1483 * Check for conflicting IP addresses. We permit them 1484 * if there is no more than one IP on each jail. If 1485 * there is a duplicate on a jail with more than one 1486 * IP stop checking and return error. 1487 */ 1488 tppr = ppr; 1489#ifdef VIMAGE 1490 for (; tppr != &prison0; tppr = tppr->pr_parent) 1491 if (tppr->pr_flags & PR_VNET) 1492 break; 1493#endif 1494 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1495 if (tpr == pr || 1496#ifdef VIMAGE 1497 (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1498#endif 1499 tpr->pr_uref == 0) { 1500 descend = 0; 1501 continue; 1502 } 1503 if (!(tpr->pr_flags & PR_IP4_USER)) 1504 continue; 1505 descend = 0; 1506 if (tpr->pr_ip4 == NULL || 1507 (ip4s == 1 && tpr->pr_ip4s == 1)) 1508 continue; 1509 for (ii = 0; ii < ip4s; ii++) { 1510 if (_prison_check_ip4(tpr, &ip4[ii]) == 0) { 1511 error = EADDRINUSE; 1512 vfs_opterror(opts, 1513 "IPv4 addresses clash"); 1514 goto done_deref_locked; 1515 } 1516 } 1517 } 1518 } 1519#endif 1520#ifdef INET6 1521 if (ip6s > 0) { 1522 if (ppr->pr_flags & PR_IP6) { 1523 /* 1524 * Make sure the new set of IP addresses is a 1525 * subset of the parent's list. 1526 */ 1527 for (ij = 0; ij < ppr->pr_ip6s; ij++) 1528 if (IN6_ARE_ADDR_EQUAL(&ip6[0], 1529 &ppr->pr_ip6[ij])) 1530 break; 1531 if (ij == ppr->pr_ip6s) { 1532 error = EPERM; 1533 goto done_deref_locked; 1534 } 1535 if (ip6s > 1) { 1536 for (ii = ij = 1; ii < ip6s; ii++) { 1537 if (IN6_ARE_ADDR_EQUAL(&ip6[ii], 1538 &ppr->pr_ip6[0])) 1539 continue; 1540 for (; ij < ppr->pr_ip6s; ij++) 1541 if (IN6_ARE_ADDR_EQUAL( 1542 &ip6[ii], &ppr->pr_ip6[ij])) 1543 break; 1544 if (ij == ppr->pr_ip6s) 1545 break; 1546 } 1547 if (ij == ppr->pr_ip6s) { 1548 error = EPERM; 1549 goto done_deref_locked; 1550 } 1551 } 1552 } 1553 /* Check for conflicting IP addresses. */ 1554 tppr = ppr; 1555#ifdef VIMAGE 1556 for (; tppr != &prison0; tppr = tppr->pr_parent) 1557 if (tppr->pr_flags & PR_VNET) 1558 break; 1559#endif 1560 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1561 if (tpr == pr || 1562#ifdef VIMAGE 1563 (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1564#endif 1565 tpr->pr_uref == 0) { 1566 descend = 0; 1567 continue; 1568 } 1569 if (!(tpr->pr_flags & PR_IP6_USER)) 1570 continue; 1571 descend = 0; 1572 if (tpr->pr_ip6 == NULL || 1573 (ip6s == 1 && tpr->pr_ip6s == 1)) 1574 continue; 1575 for (ii = 0; ii < ip6s; ii++) { 1576 if (_prison_check_ip6(tpr, &ip6[ii]) == 0) { 1577 error = EADDRINUSE; 1578 vfs_opterror(opts, 1579 "IPv6 addresses clash"); 1580 goto done_deref_locked; 1581 } 1582 } 1583 } 1584 } 1585#endif 1586 onamelen = namelen = 0; 1587 if (name != NULL) { 1588 /* Give a default name of the jid. */ 1589 if (name[0] == '\0') 1590 snprintf(name = numbuf, sizeof(numbuf), "%d", jid); 1591 else if (*namelc == '0' || (strtoul(namelc, &p, 10) != jid && 1592 *p == '\0')) { 1593 error = EINVAL; 1594 vfs_opterror(opts, 1595 "name cannot be numeric (unless it is the jid)"); 1596 goto done_deref_locked; 1597 } 1598 /* 1599 * Make sure the name isn't too long for the prison or its 1600 * children. 1601 */ 1602 onamelen = strlen(pr->pr_name); 1603 namelen = strlen(name); 1604 if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) { 1605 error = ENAMETOOLONG; 1606 goto done_deref_locked; 1607 } 1608 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1609 if (strlen(tpr->pr_name) + (namelen - onamelen) >= 1610 sizeof(pr->pr_name)) { 1611 error = ENAMETOOLONG; 1612 goto done_deref_locked; 1613 } 1614 } 1615 } 1616 if (pr_allow & ~ppr->pr_allow) { 1617 error = EPERM; 1618 goto done_deref_locked; 1619 } 1620 1621 /* Set the parameters of the prison. */ 1622#ifdef INET 1623 redo_ip4 = 0; 1624 if (pr_flags & PR_IP4_USER) { 1625 pr->pr_flags |= PR_IP4; 1626 free(pr->pr_ip4, M_PRISON); 1627 pr->pr_ip4s = ip4s; 1628 pr->pr_ip4 = ip4; 1629 ip4 = NULL; 1630 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1631#ifdef VIMAGE 1632 if (tpr->pr_flags & PR_VNET) { 1633 descend = 0; 1634 continue; 1635 } 1636#endif 1637 if (prison_restrict_ip4(tpr, NULL)) { 1638 redo_ip4 = 1; 1639 descend = 0; 1640 } 1641 } 1642 } 1643#endif 1644#ifdef INET6 1645 redo_ip6 = 0; 1646 if (pr_flags & PR_IP6_USER) { 1647 pr->pr_flags |= PR_IP6; 1648 free(pr->pr_ip6, M_PRISON); 1649 pr->pr_ip6s = ip6s; 1650 pr->pr_ip6 = ip6; 1651 ip6 = NULL; 1652 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1653#ifdef VIMAGE 1654 if (tpr->pr_flags & PR_VNET) { 1655 descend = 0; 1656 continue; 1657 } 1658#endif 1659 if (prison_restrict_ip6(tpr, NULL)) { 1660 redo_ip6 = 1; 1661 descend = 0; 1662 } 1663 } 1664 } 1665#endif 1666 if (gotslevel) { 1667 pr->pr_securelevel = slevel; 1668 /* Set all child jails to be at least this level. */ 1669 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1670 if (tpr->pr_securelevel < slevel) 1671 tpr->pr_securelevel = slevel; 1672 } 1673 if (gotchildmax) { 1674 pr->pr_childmax = childmax; 1675 /* Set all child jails to under this limit. */ 1676 FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level) 1677 if (tpr->pr_childmax > childmax - level) 1678 tpr->pr_childmax = childmax > level 1679 ? childmax - level : 0; 1680 } 1681 if (gotenforce) { 1682 pr->pr_enforce_statfs = enforce; 1683 /* Pass this restriction on to the children. */ 1684 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1685 if (tpr->pr_enforce_statfs < enforce) 1686 tpr->pr_enforce_statfs = enforce; 1687 } 1688 if (gotrsnum) { 1689 pr->pr_devfs_rsnum = rsnum; 1690 /* Pass this restriction on to the children. */ 1691 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1692 tpr->pr_devfs_rsnum = rsnum; 1693 } 1694 if (name != NULL) { 1695 if (ppr == &prison0) 1696 strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); 1697 else 1698 snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", 1699 ppr->pr_name, name); 1700 /* Change this component of child names. */ 1701 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1702 bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, 1703 strlen(tpr->pr_name + onamelen) + 1); 1704 bcopy(pr->pr_name, tpr->pr_name, namelen); 1705 } 1706 } 1707 if (path != NULL) { 1708 /* Try to keep a real-rooted full pathname. */ 1709 if (fullpath_disabled && path[0] == '/' && 1710 strcmp(mypr->pr_path, "/")) 1711 snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s", 1712 mypr->pr_path, path); 1713 else 1714 strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); 1715 pr->pr_root = root; 1716 } 1717 if (PR_HOST & ch_flags & ~pr_flags) { 1718 if (pr->pr_flags & PR_HOST) { 1719 /* 1720 * Copy the parent's host info. As with pr_ip4 above, 1721 * the lack of a lock on the parent is not a problem; 1722 * it is always set with allprison_lock at least 1723 * shared, and is held exclusively here. 1724 */ 1725 strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname, 1726 sizeof(pr->pr_hostname)); 1727 strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname, 1728 sizeof(pr->pr_domainname)); 1729 strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid, 1730 sizeof(pr->pr_hostuuid)); 1731 pr->pr_hostid = pr->pr_parent->pr_hostid; 1732 } 1733 } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1734 /* Set this prison, and any descendants without PR_HOST. */ 1735 if (host != NULL) 1736 strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname)); 1737 if (domain != NULL) 1738 strlcpy(pr->pr_domainname, domain, 1739 sizeof(pr->pr_domainname)); 1740 if (uuid != NULL) 1741 strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid)); 1742 if (gothid) 1743 pr->pr_hostid = hid; 1744 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1745 if (tpr->pr_flags & PR_HOST) 1746 descend = 0; 1747 else { 1748 if (host != NULL) 1749 strlcpy(tpr->pr_hostname, 1750 pr->pr_hostname, 1751 sizeof(tpr->pr_hostname)); 1752 if (domain != NULL) 1753 strlcpy(tpr->pr_domainname, 1754 pr->pr_domainname, 1755 sizeof(tpr->pr_domainname)); 1756 if (uuid != NULL) 1757 strlcpy(tpr->pr_hostuuid, 1758 pr->pr_hostuuid, 1759 sizeof(tpr->pr_hostuuid)); 1760 if (gothid) 1761 tpr->pr_hostid = hid; 1762 } 1763 } 1764 } 1765 if ((tallow = ch_allow & ~pr_allow)) { 1766 /* Clear allow bits in all children. */ 1767 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1768 tpr->pr_allow &= ~tallow; 1769 } 1770 pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; 1771 /* 1772 * Persistent prisons get an extra reference, and prisons losing their 1773 * persist flag lose that reference. Only do this for existing prisons 1774 * for now, so new ones will remain unseen until after the module 1775 * handlers have completed. 1776 */ 1777 if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { 1778 if (pr_flags & PR_PERSIST) { 1779 pr->pr_ref++; 1780 pr->pr_uref++; 1781 } else { 1782 pr->pr_ref--; 1783 pr->pr_uref--; 1784 } 1785 } 1786 pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; 1787 mtx_unlock(&pr->pr_mtx); 1788 1789#ifdef RACCT 1790 if (created) 1791 prison_racct_attach(pr); 1792#endif 1793 1794 /* Locks may have prevented a complete restriction of child IP 1795 * addresses. If so, allocate some more memory and try again. 1796 */ 1797#ifdef INET 1798 while (redo_ip4) { 1799 ip4s = pr->pr_ip4s; 1800 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 1801 mtx_lock(&pr->pr_mtx); 1802 redo_ip4 = 0; 1803 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1804#ifdef VIMAGE 1805 if (tpr->pr_flags & PR_VNET) { 1806 descend = 0; 1807 continue; 1808 } 1809#endif 1810 if (prison_restrict_ip4(tpr, ip4)) { 1811 if (ip4 != NULL) 1812 ip4 = NULL; 1813 else 1814 redo_ip4 = 1; 1815 } 1816 } 1817 mtx_unlock(&pr->pr_mtx); 1818 } 1819#endif 1820#ifdef INET6 1821 while (redo_ip6) { 1822 ip6s = pr->pr_ip6s; 1823 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 1824 mtx_lock(&pr->pr_mtx); 1825 redo_ip6 = 0; 1826 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1827#ifdef VIMAGE 1828 if (tpr->pr_flags & PR_VNET) { 1829 descend = 0; 1830 continue; 1831 } 1832#endif 1833 if (prison_restrict_ip6(tpr, ip6)) { 1834 if (ip6 != NULL) 1835 ip6 = NULL; 1836 else 1837 redo_ip6 = 1; 1838 } 1839 } 1840 mtx_unlock(&pr->pr_mtx); 1841 } 1842#endif 1843 1844 /* Let the modules do their work. */ 1845 sx_downgrade(&allprison_lock); 1846 if (created) { 1847 error = osd_jail_call(pr, PR_METHOD_CREATE, opts); 1848 if (error) { 1849 prison_deref(pr, PD_LIST_SLOCKED); 1850 goto done_errmsg; 1851 } 1852 } 1853 error = osd_jail_call(pr, PR_METHOD_SET, opts); 1854 if (error) { 1855 prison_deref(pr, created 1856 ? PD_LIST_SLOCKED 1857 : PD_DEREF | PD_LIST_SLOCKED); 1858 goto done_errmsg; 1859 } 1860 1861 /* Attach this process to the prison if requested. */ 1862 if (flags & JAIL_ATTACH) { 1863 mtx_lock(&pr->pr_mtx); 1864 error = do_jail_attach(td, pr); 1865 if (error) { 1866 vfs_opterror(opts, "attach failed"); 1867 if (!created) 1868 prison_deref(pr, PD_DEREF); 1869 goto done_errmsg; 1870 } 1871 } 1872 1873#ifdef RACCT 1874 if (!created) { 1875 if (!(flags & JAIL_ATTACH)) 1876 sx_sunlock(&allprison_lock); 1877 prison_racct_modify(pr); 1878 if (!(flags & JAIL_ATTACH)) 1879 sx_slock(&allprison_lock); 1880 } 1881#endif 1882 1883 td->td_retval[0] = pr->pr_id; 1884 1885 /* 1886 * Now that it is all there, drop the temporary reference from existing 1887 * prisons. Or add a reference to newly created persistent prisons 1888 * (which was not done earlier so that the prison would not be publicly 1889 * visible). 1890 */ 1891 if (!created) { 1892 prison_deref(pr, (flags & JAIL_ATTACH) 1893 ? PD_DEREF 1894 : PD_DEREF | PD_LIST_SLOCKED); 1895 } else { 1896 if (pr_flags & PR_PERSIST) { 1897 mtx_lock(&pr->pr_mtx); 1898 pr->pr_ref++; 1899 pr->pr_uref++; 1900 mtx_unlock(&pr->pr_mtx); 1901 } 1902 if (!(flags & JAIL_ATTACH)) 1903 sx_sunlock(&allprison_lock); 1904 } 1905 1906 goto done_errmsg; 1907 1908 done_deref_locked: 1909 prison_deref(pr, created 1910 ? PD_LOCKED | PD_LIST_XLOCKED 1911 : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 1912 goto done_releroot; 1913 done_unlock_list: 1914 sx_xunlock(&allprison_lock); 1915 done_releroot: 1916 if (root != NULL) 1917 vrele(root); 1918 done_errmsg: 1919 if (error) { 1920 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1921 if (errmsg_len > 0) { 1922 errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; 1923 if (errmsg_pos > 0) { 1924 if (optuio->uio_segflg == UIO_SYSSPACE) 1925 bcopy(errmsg, 1926 optuio->uio_iov[errmsg_pos].iov_base, 1927 errmsg_len); 1928 else 1929 copyout(errmsg, 1930 optuio->uio_iov[errmsg_pos].iov_base, 1931 errmsg_len); 1932 } 1933 } 1934 } 1935 done_free: 1936#ifdef INET 1937 free(ip4, M_PRISON); 1938#endif 1939#ifdef INET6 1940 free(ip6, M_PRISON); 1941#endif 1942 if (g_path != NULL) 1943 free(g_path, M_TEMP); 1944 vfs_freeopts(opts); 1945 return (error); 1946} 1947 1948 1949/* 1950 * struct jail_get_args { 1951 * struct iovec *iovp; 1952 * unsigned int iovcnt; 1953 * int flags; 1954 * }; 1955 */ 1956int 1957sys_jail_get(struct thread *td, struct jail_get_args *uap) 1958{ 1959 struct uio *auio; 1960 int error; 1961 1962 /* Check that we have an even number of iovecs. */ 1963 if (uap->iovcnt & 1) 1964 return (EINVAL); 1965 1966 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 1967 if (error) 1968 return (error); 1969 error = kern_jail_get(td, auio, uap->flags); 1970 if (error == 0) 1971 error = copyout(auio->uio_iov, uap->iovp, 1972 uap->iovcnt * sizeof (struct iovec)); 1973 free(auio, M_IOV); 1974 return (error); 1975} 1976 1977int 1978kern_jail_get(struct thread *td, struct uio *optuio, int flags) 1979{ 1980 struct prison *pr, *mypr; 1981 struct vfsopt *opt; 1982 struct vfsoptlist *opts; 1983 char *errmsg, *name; 1984 int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos; 1985 1986 if (flags & ~JAIL_GET_MASK) 1987 return (EINVAL); 1988 1989 /* Get the parameter list. */ 1990 error = vfs_buildopts(optuio, &opts); 1991 if (error) 1992 return (error); 1993 errmsg_pos = vfs_getopt_pos(opts, "errmsg"); 1994 mypr = td->td_ucred->cr_prison; 1995 1996 /* 1997 * Find the prison specified by one of: lastjid, jid, name. 1998 */ 1999 sx_slock(&allprison_lock); 2000 error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); 2001 if (error == 0) { 2002 TAILQ_FOREACH(pr, &allprison, pr_list) { 2003 if (pr->pr_id > jid && prison_ischild(mypr, pr)) { 2004 mtx_lock(&pr->pr_mtx); 2005 if (pr->pr_ref > 0 && 2006 (pr->pr_uref > 0 || (flags & JAIL_DYING))) 2007 break; 2008 mtx_unlock(&pr->pr_mtx); 2009 } 2010 } 2011 if (pr != NULL) 2012 goto found_prison; 2013 error = ENOENT; 2014 vfs_opterror(opts, "no jail after %d", jid); 2015 goto done_unlock_list; 2016 } else if (error != ENOENT) 2017 goto done_unlock_list; 2018 2019 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 2020 if (error == 0) { 2021 if (jid != 0) { 2022 pr = prison_find_child(mypr, jid); 2023 if (pr != NULL) { 2024 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 2025 mtx_unlock(&pr->pr_mtx); 2026 error = ENOENT; 2027 vfs_opterror(opts, "jail %d is dying", 2028 jid); 2029 goto done_unlock_list; 2030 } 2031 goto found_prison; 2032 } 2033 error = ENOENT; 2034 vfs_opterror(opts, "jail %d not found", jid); 2035 goto done_unlock_list; 2036 } 2037 } else if (error != ENOENT) 2038 goto done_unlock_list; 2039 2040 error = vfs_getopt(opts, "name", (void **)&name, &len); 2041 if (error == 0) { 2042 if (len == 0 || name[len - 1] != '\0') { 2043 error = EINVAL; 2044 goto done_unlock_list; 2045 } 2046 pr = prison_find_name(mypr, name); 2047 if (pr != NULL) { 2048 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 2049 mtx_unlock(&pr->pr_mtx); 2050 error = ENOENT; 2051 vfs_opterror(opts, "jail \"%s\" is dying", 2052 name); 2053 goto done_unlock_list; 2054 } 2055 goto found_prison; 2056 } 2057 error = ENOENT; 2058 vfs_opterror(opts, "jail \"%s\" not found", name); 2059 goto done_unlock_list; 2060 } else if (error != ENOENT) 2061 goto done_unlock_list; 2062 2063 vfs_opterror(opts, "no jail specified"); 2064 error = ENOENT; 2065 goto done_unlock_list; 2066 2067 found_prison: 2068 /* Get the parameters of the prison. */ 2069 pr->pr_ref++; 2070 locked = PD_LOCKED; 2071 td->td_retval[0] = pr->pr_id; 2072 error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); 2073 if (error != 0 && error != ENOENT) 2074 goto done_deref; 2075 i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; 2076 error = vfs_setopt(opts, "parent", &i, sizeof(i)); 2077 if (error != 0 && error != ENOENT) 2078 goto done_deref; 2079 error = vfs_setopts(opts, "name", prison_name(mypr, pr)); 2080 if (error != 0 && error != ENOENT) 2081 goto done_deref; 2082 error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, 2083 sizeof(pr->pr_cpuset->cs_id)); 2084 if (error != 0 && error != ENOENT) 2085 goto done_deref; 2086 error = vfs_setopts(opts, "path", prison_path(mypr, pr)); 2087 if (error != 0 && error != ENOENT) 2088 goto done_deref; 2089#ifdef INET 2090 error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, 2091 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2092 if (error != 0 && error != ENOENT) 2093 goto done_deref; 2094#endif 2095#ifdef INET6 2096 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, 2097 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 2098 if (error != 0 && error != ENOENT) 2099 goto done_deref; 2100#endif 2101 error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, 2102 sizeof(pr->pr_securelevel)); 2103 if (error != 0 && error != ENOENT) 2104 goto done_deref; 2105 error = vfs_setopt(opts, "children.cur", &pr->pr_childcount, 2106 sizeof(pr->pr_childcount)); 2107 if (error != 0 && error != ENOENT) 2108 goto done_deref; 2109 error = vfs_setopt(opts, "children.max", &pr->pr_childmax, 2110 sizeof(pr->pr_childmax)); 2111 if (error != 0 && error != ENOENT) 2112 goto done_deref; 2113 error = vfs_setopts(opts, "host.hostname", pr->pr_hostname); 2114 if (error != 0 && error != ENOENT) 2115 goto done_deref; 2116 error = vfs_setopts(opts, "host.domainname", pr->pr_domainname); 2117 if (error != 0 && error != ENOENT) 2118 goto done_deref; 2119 error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid); 2120 if (error != 0 && error != ENOENT) 2121 goto done_deref; 2122#ifdef COMPAT_FREEBSD32 2123 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 2124 uint32_t hid32 = pr->pr_hostid; 2125 2126 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); 2127 } else 2128#endif 2129 error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, 2130 sizeof(pr->pr_hostid)); 2131 if (error != 0 && error != ENOENT) 2132 goto done_deref; 2133 error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, 2134 sizeof(pr->pr_enforce_statfs)); 2135 if (error != 0 && error != ENOENT) 2136 goto done_deref; 2137 error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum, 2138 sizeof(pr->pr_devfs_rsnum)); 2139 if (error != 0 && error != ENOENT) 2140 goto done_deref; 2141 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 2142 fi++) { 2143 if (pr_flag_names[fi] == NULL) 2144 continue; 2145 i = (pr->pr_flags & (1 << fi)) ? 1 : 0; 2146 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i)); 2147 if (error != 0 && error != ENOENT) 2148 goto done_deref; 2149 i = !i; 2150 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i)); 2151 if (error != 0 && error != ENOENT) 2152 goto done_deref; 2153 } 2154 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 2155 fi++) { 2156 i = pr->pr_flags & 2157 (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); 2158 i = pr_flag_jailsys[fi].disable && 2159 (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE 2160 : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW 2161 : JAIL_SYS_INHERIT; 2162 error = 2163 vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i)); 2164 if (error != 0 && error != ENOENT) 2165 goto done_deref; 2166 } 2167 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 2168 fi++) { 2169 if (pr_allow_names[fi] == NULL) 2170 continue; 2171 i = (pr->pr_allow & (1 << fi)) ? 1 : 0; 2172 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i)); 2173 if (error != 0 && error != ENOENT) 2174 goto done_deref; 2175 i = !i; 2176 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i)); 2177 if (error != 0 && error != ENOENT) 2178 goto done_deref; 2179 } 2180 i = (pr->pr_uref == 0); 2181 error = vfs_setopt(opts, "dying", &i, sizeof(i)); 2182 if (error != 0 && error != ENOENT) 2183 goto done_deref; 2184 i = !i; 2185 error = vfs_setopt(opts, "nodying", &i, sizeof(i)); 2186 if (error != 0 && error != ENOENT) 2187 goto done_deref; 2188 error = vfs_setopt(opts, "osreldate", &pr->pr_osreldate, 2189 sizeof(pr->pr_osreldate)); 2190 if (error != 0 && error != ENOENT) 2191 goto done_deref; 2192 error = vfs_setopts(opts, "osrelease", pr->pr_osrelease); 2193 if (error != 0 && error != ENOENT) 2194 goto done_deref; 2195 2196 /* Get the module parameters. */ 2197 mtx_unlock(&pr->pr_mtx); 2198 locked = 0; 2199 error = osd_jail_call(pr, PR_METHOD_GET, opts); 2200 if (error) 2201 goto done_deref; 2202 prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); 2203 2204 /* By now, all parameters should have been noted. */ 2205 TAILQ_FOREACH(opt, opts, link) { 2206 if (!opt->seen && strcmp(opt->name, "errmsg")) { 2207 error = EINVAL; 2208 vfs_opterror(opts, "unknown parameter: %s", opt->name); 2209 goto done_errmsg; 2210 } 2211 } 2212 2213 /* Write the fetched parameters back to userspace. */ 2214 error = 0; 2215 TAILQ_FOREACH(opt, opts, link) { 2216 if (opt->pos >= 0 && opt->pos != errmsg_pos) { 2217 pos = 2 * opt->pos + 1; 2218 optuio->uio_iov[pos].iov_len = opt->len; 2219 if (opt->value != NULL) { 2220 if (optuio->uio_segflg == UIO_SYSSPACE) { 2221 bcopy(opt->value, 2222 optuio->uio_iov[pos].iov_base, 2223 opt->len); 2224 } else { 2225 error = copyout(opt->value, 2226 optuio->uio_iov[pos].iov_base, 2227 opt->len); 2228 if (error) 2229 break; 2230 } 2231 } 2232 } 2233 } 2234 goto done_errmsg; 2235 2236 done_deref: 2237 prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); 2238 goto done_errmsg; 2239 2240 done_unlock_list: 2241 sx_sunlock(&allprison_lock); 2242 done_errmsg: 2243 if (error && errmsg_pos >= 0) { 2244 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 2245 errmsg_pos = 2 * errmsg_pos + 1; 2246 if (errmsg_len > 0) { 2247 if (optuio->uio_segflg == UIO_SYSSPACE) 2248 bcopy(errmsg, 2249 optuio->uio_iov[errmsg_pos].iov_base, 2250 errmsg_len); 2251 else 2252 copyout(errmsg, 2253 optuio->uio_iov[errmsg_pos].iov_base, 2254 errmsg_len); 2255 } 2256 } 2257 vfs_freeopts(opts); 2258 return (error); 2259} 2260 2261 2262/* 2263 * struct jail_remove_args { 2264 * int jid; 2265 * }; 2266 */ 2267int 2268sys_jail_remove(struct thread *td, struct jail_remove_args *uap) 2269{ 2270 struct prison *pr, *cpr, *lpr, *tpr; 2271 int descend, error; 2272 2273 error = priv_check(td, PRIV_JAIL_REMOVE); 2274 if (error) 2275 return (error); 2276 2277 sx_xlock(&allprison_lock); 2278 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2279 if (pr == NULL) { 2280 sx_xunlock(&allprison_lock); 2281 return (EINVAL); 2282 } 2283 2284 /* Remove all descendants of this prison, then remove this prison. */ 2285 pr->pr_ref++; 2286 pr->pr_flags |= PR_REMOVE; 2287 if (!LIST_EMPTY(&pr->pr_children)) { 2288 mtx_unlock(&pr->pr_mtx); 2289 lpr = NULL; 2290 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 2291 mtx_lock(&cpr->pr_mtx); 2292 if (cpr->pr_ref > 0) { 2293 tpr = cpr; 2294 cpr->pr_ref++; 2295 cpr->pr_flags |= PR_REMOVE; 2296 } else { 2297 /* Already removed - do not do it again. */ 2298 tpr = NULL; 2299 } 2300 mtx_unlock(&cpr->pr_mtx); 2301 if (lpr != NULL) { 2302 mtx_lock(&lpr->pr_mtx); 2303 prison_remove_one(lpr); 2304 sx_xlock(&allprison_lock); 2305 } 2306 lpr = tpr; 2307 } 2308 if (lpr != NULL) { 2309 mtx_lock(&lpr->pr_mtx); 2310 prison_remove_one(lpr); 2311 sx_xlock(&allprison_lock); 2312 } 2313 mtx_lock(&pr->pr_mtx); 2314 } 2315 prison_remove_one(pr); 2316 return (0); 2317} 2318 2319static void 2320prison_remove_one(struct prison *pr) 2321{ 2322 struct proc *p; 2323 int deuref; 2324 2325 /* If the prison was persistent, it is not anymore. */ 2326 deuref = 0; 2327 if (pr->pr_flags & PR_PERSIST) { 2328 pr->pr_ref--; 2329 deuref = PD_DEUREF; 2330 pr->pr_flags &= ~PR_PERSIST; 2331 } 2332 2333 /* 2334 * jail_remove added a reference. If that's the only one, remove 2335 * the prison now. 2336 */ 2337 KASSERT(pr->pr_ref > 0, 2338 ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); 2339 if (pr->pr_ref == 1) { 2340 prison_deref(pr, 2341 deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 2342 return; 2343 } 2344 2345 mtx_unlock(&pr->pr_mtx); 2346 sx_xunlock(&allprison_lock); 2347 /* 2348 * Kill all processes unfortunate enough to be attached to this prison. 2349 */ 2350 sx_slock(&allproc_lock); 2351 LIST_FOREACH(p, &allproc, p_list) { 2352 PROC_LOCK(p); 2353 if (p->p_state != PRS_NEW && p->p_ucred && 2354 p->p_ucred->cr_prison == pr) 2355 kern_psignal(p, SIGKILL); 2356 PROC_UNLOCK(p); 2357 } 2358 sx_sunlock(&allproc_lock); 2359 /* Remove the temporary reference added by jail_remove. */ 2360 prison_deref(pr, deuref | PD_DEREF); 2361} 2362 2363 2364/* 2365 * struct jail_attach_args { 2366 * int jid; 2367 * }; 2368 */ 2369int 2370sys_jail_attach(struct thread *td, struct jail_attach_args *uap) 2371{ 2372 struct prison *pr; 2373 int error; 2374 2375 error = priv_check(td, PRIV_JAIL_ATTACH); 2376 if (error) 2377 return (error); 2378 2379 sx_slock(&allprison_lock); 2380 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2381 if (pr == NULL) { 2382 sx_sunlock(&allprison_lock); 2383 return (EINVAL); 2384 } 2385 2386 /* 2387 * Do not allow a process to attach to a prison that is not 2388 * considered to be "alive". 2389 */ 2390 if (pr->pr_uref == 0) { 2391 mtx_unlock(&pr->pr_mtx); 2392 sx_sunlock(&allprison_lock); 2393 return (EINVAL); 2394 } 2395 2396 return (do_jail_attach(td, pr)); 2397} 2398 2399static int 2400do_jail_attach(struct thread *td, struct prison *pr) 2401{ 2402 struct prison *ppr; 2403 struct proc *p; 2404 struct ucred *newcred, *oldcred; 2405 int error; 2406 2407 /* 2408 * XXX: Note that there is a slight race here if two threads 2409 * in the same privileged process attempt to attach to two 2410 * different jails at the same time. It is important for 2411 * user processes not to do this, or they might end up with 2412 * a process root from one prison, but attached to the jail 2413 * of another. 2414 */ 2415 pr->pr_ref++; 2416 pr->pr_uref++; 2417 mtx_unlock(&pr->pr_mtx); 2418 2419 /* Let modules do whatever they need to prepare for attaching. */ 2420 error = osd_jail_call(pr, PR_METHOD_ATTACH, td); 2421 if (error) { 2422 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); 2423 return (error); 2424 } 2425 sx_sunlock(&allprison_lock); 2426 2427 /* 2428 * Reparent the newly attached process to this jail. 2429 */ 2430 ppr = td->td_ucred->cr_prison; 2431 p = td->td_proc; 2432 error = cpuset_setproc_update_set(p, pr->pr_cpuset); 2433 if (error) 2434 goto e_revert_osd; 2435 2436 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 2437 if ((error = change_dir(pr->pr_root, td)) != 0) 2438 goto e_unlock; 2439#ifdef MAC 2440 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 2441 goto e_unlock; 2442#endif 2443 VOP_UNLOCK(pr->pr_root, 0); 2444 if ((error = change_root(pr->pr_root, td))) 2445 goto e_revert_osd; 2446 2447 newcred = crget(); 2448 PROC_LOCK(p); 2449 oldcred = p->p_ucred; 2450 setsugid(p); 2451 crcopy(newcred, oldcred); 2452 newcred->cr_prison = pr; 2453 p->p_ucred = newcred; 2454 PROC_UNLOCK(p); 2455#ifdef RACCT 2456 racct_proc_ucred_changed(p, oldcred, newcred); 2457#endif 2458 crfree(oldcred); 2459 prison_deref(ppr, PD_DEREF | PD_DEUREF); 2460 return (0); 2461 e_unlock: 2462 VOP_UNLOCK(pr->pr_root, 0); 2463 e_revert_osd: 2464 /* Tell modules this thread is still in its old jail after all. */ 2465 (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td); 2466 prison_deref(pr, PD_DEREF | PD_DEUREF); 2467 return (error); 2468} 2469 2470 2471/* 2472 * Returns a locked prison instance, or NULL on failure. 2473 */ 2474struct prison * 2475prison_find(int prid) 2476{ 2477 struct prison *pr; 2478 2479 sx_assert(&allprison_lock, SX_LOCKED); 2480 TAILQ_FOREACH(pr, &allprison, pr_list) { 2481 if (pr->pr_id == prid) { 2482 mtx_lock(&pr->pr_mtx); 2483 if (pr->pr_ref > 0) 2484 return (pr); 2485 mtx_unlock(&pr->pr_mtx); 2486 } 2487 } 2488 return (NULL); 2489} 2490 2491/* 2492 * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 2493 */ 2494struct prison * 2495prison_find_child(struct prison *mypr, int prid) 2496{ 2497 struct prison *pr; 2498 int descend; 2499 2500 sx_assert(&allprison_lock, SX_LOCKED); 2501 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2502 if (pr->pr_id == prid) { 2503 mtx_lock(&pr->pr_mtx); 2504 if (pr->pr_ref > 0) 2505 return (pr); 2506 mtx_unlock(&pr->pr_mtx); 2507 } 2508 } 2509 return (NULL); 2510} 2511 2512/* 2513 * Look for the name relative to mypr. Returns a locked prison or NULL. 2514 */ 2515struct prison * 2516prison_find_name(struct prison *mypr, const char *name) 2517{ 2518 struct prison *pr, *deadpr; 2519 size_t mylen; 2520 int descend; 2521 2522 sx_assert(&allprison_lock, SX_LOCKED); 2523 mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; 2524 again: 2525 deadpr = NULL; 2526 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2527 if (!strcmp(pr->pr_name + mylen, name)) { 2528 mtx_lock(&pr->pr_mtx); 2529 if (pr->pr_ref > 0) { 2530 if (pr->pr_uref > 0) 2531 return (pr); 2532 deadpr = pr; 2533 } 2534 mtx_unlock(&pr->pr_mtx); 2535 } 2536 } 2537 /* There was no valid prison - perhaps there was a dying one. */ 2538 if (deadpr != NULL) { 2539 mtx_lock(&deadpr->pr_mtx); 2540 if (deadpr->pr_ref == 0) { 2541 mtx_unlock(&deadpr->pr_mtx); 2542 goto again; 2543 } 2544 } 2545 return (deadpr); 2546} 2547 2548/* 2549 * See if a prison has the specific flag set. 2550 */ 2551int 2552prison_flag(struct ucred *cred, unsigned flag) 2553{ 2554 2555 /* This is an atomic read, so no locking is necessary. */ 2556 return (cred->cr_prison->pr_flags & flag); 2557} 2558 2559int 2560prison_allow(struct ucred *cred, unsigned flag) 2561{ 2562 2563 /* This is an atomic read, so no locking is necessary. */ 2564 return (cred->cr_prison->pr_allow & flag); 2565} 2566 2567/* 2568 * Remove a prison reference. If that was the last reference, remove the 2569 * prison itself - but not in this context in case there are locks held. 2570 */ 2571void 2572prison_free_locked(struct prison *pr) 2573{ 2574 2575 mtx_assert(&pr->pr_mtx, MA_OWNED); 2576 pr->pr_ref--; 2577 if (pr->pr_ref == 0) { 2578 mtx_unlock(&pr->pr_mtx); 2579 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 2580 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 2581 return; 2582 } 2583 mtx_unlock(&pr->pr_mtx); 2584} 2585 2586void 2587prison_free(struct prison *pr) 2588{ 2589 2590 mtx_lock(&pr->pr_mtx); 2591 prison_free_locked(pr); 2592} 2593 2594static void 2595prison_complete(void *context, int pending) 2596{ 2597 2598 prison_deref((struct prison *)context, 0); 2599} 2600 2601/* 2602 * Remove a prison reference (usually). This internal version assumes no 2603 * mutexes are held, except perhaps the prison itself. If there are no more 2604 * references, release and delist the prison. On completion, the prison lock 2605 * and the allprison lock are both unlocked. 2606 */ 2607static void 2608prison_deref(struct prison *pr, int flags) 2609{ 2610 struct prison *ppr, *tpr; 2611 2612 if (!(flags & PD_LOCKED)) 2613 mtx_lock(&pr->pr_mtx); 2614 for (;;) { 2615 if (flags & PD_DEUREF) { 2616 pr->pr_uref--; 2617 KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0")); 2618 } 2619 if (flags & PD_DEREF) 2620 pr->pr_ref--; 2621 /* If the prison still has references, nothing else to do. */ 2622 if (pr->pr_ref > 0) { 2623 mtx_unlock(&pr->pr_mtx); 2624 if (flags & PD_LIST_SLOCKED) 2625 sx_sunlock(&allprison_lock); 2626 else if (flags & PD_LIST_XLOCKED) 2627 sx_xunlock(&allprison_lock); 2628 return; 2629 } 2630 2631 mtx_unlock(&pr->pr_mtx); 2632 if (flags & PD_LIST_SLOCKED) { 2633 if (!sx_try_upgrade(&allprison_lock)) { 2634 sx_sunlock(&allprison_lock); 2635 sx_xlock(&allprison_lock); 2636 } 2637 } else if (!(flags & PD_LIST_XLOCKED)) 2638 sx_xlock(&allprison_lock); 2639 2640 TAILQ_REMOVE(&allprison, pr, pr_list); 2641 LIST_REMOVE(pr, pr_sibling); 2642 ppr = pr->pr_parent; 2643 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 2644 tpr->pr_childcount--; 2645 sx_xunlock(&allprison_lock); 2646 2647#ifdef VIMAGE 2648 if (pr->pr_vnet != ppr->pr_vnet) 2649 vnet_destroy(pr->pr_vnet); 2650#endif 2651 if (pr->pr_root != NULL) 2652 vrele(pr->pr_root); 2653 mtx_destroy(&pr->pr_mtx); 2654#ifdef INET 2655 free(pr->pr_ip4, M_PRISON); 2656#endif 2657#ifdef INET6 2658 free(pr->pr_ip6, M_PRISON); 2659#endif 2660 if (pr->pr_cpuset != NULL) 2661 cpuset_rel(pr->pr_cpuset); 2662 osd_jail_exit(pr); 2663#ifdef RACCT 2664 prison_racct_detach(pr); 2665#endif 2666 free(pr, M_PRISON); 2667 2668 /* Removing a prison frees a reference on its parent. */ 2669 pr = ppr; 2670 mtx_lock(&pr->pr_mtx); 2671 flags = PD_DEREF | PD_DEUREF; 2672 } 2673} 2674 2675void 2676prison_hold_locked(struct prison *pr) 2677{ 2678 2679 mtx_assert(&pr->pr_mtx, MA_OWNED); 2680 KASSERT(pr->pr_ref > 0, 2681 ("Trying to hold dead prison (jid=%d).", pr->pr_id)); 2682 pr->pr_ref++; 2683} 2684 2685void 2686prison_hold(struct prison *pr) 2687{ 2688 2689 mtx_lock(&pr->pr_mtx); 2690 prison_hold_locked(pr); 2691 mtx_unlock(&pr->pr_mtx); 2692} 2693 2694void 2695prison_proc_hold(struct prison *pr) 2696{ 2697 2698 mtx_lock(&pr->pr_mtx); 2699 KASSERT(pr->pr_uref > 0, 2700 ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); 2701 pr->pr_uref++; 2702 mtx_unlock(&pr->pr_mtx); 2703} 2704 2705void 2706prison_proc_free(struct prison *pr) 2707{ 2708 2709 mtx_lock(&pr->pr_mtx); 2710 KASSERT(pr->pr_uref > 0, 2711 ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); 2712 prison_deref(pr, PD_DEUREF | PD_LOCKED); 2713} 2714 2715 2716#ifdef INET 2717/* 2718 * Restrict a prison's IP address list with its parent's, possibly replacing 2719 * it. Return true if the replacement buffer was used (or would have been). 2720 */ 2721static int 2722prison_restrict_ip4(struct prison *pr, struct in_addr *newip4) 2723{ 2724 int ii, ij, used; 2725 struct prison *ppr; 2726 2727 ppr = pr->pr_parent; 2728 if (!(pr->pr_flags & PR_IP4_USER)) { 2729 /* This has no user settings, so just copy the parent's list. */ 2730 if (pr->pr_ip4s < ppr->pr_ip4s) { 2731 /* 2732 * There's no room for the parent's list. Use the 2733 * new list buffer, which is assumed to be big enough 2734 * (if it was passed). If there's no buffer, try to 2735 * allocate one. 2736 */ 2737 used = 1; 2738 if (newip4 == NULL) { 2739 newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), 2740 M_PRISON, M_NOWAIT); 2741 if (newip4 != NULL) 2742 used = 0; 2743 } 2744 if (newip4 != NULL) { 2745 bcopy(ppr->pr_ip4, newip4, 2746 ppr->pr_ip4s * sizeof(*newip4)); 2747 free(pr->pr_ip4, M_PRISON); 2748 pr->pr_ip4 = newip4; 2749 pr->pr_ip4s = ppr->pr_ip4s; 2750 } 2751 return (used); 2752 } 2753 pr->pr_ip4s = ppr->pr_ip4s; 2754 if (pr->pr_ip4s > 0) 2755 bcopy(ppr->pr_ip4, pr->pr_ip4, 2756 pr->pr_ip4s * sizeof(*newip4)); 2757 else if (pr->pr_ip4 != NULL) { 2758 free(pr->pr_ip4, M_PRISON); 2759 pr->pr_ip4 = NULL; 2760 } 2761 } else if (pr->pr_ip4s > 0) { 2762 /* Remove addresses that aren't in the parent. */ 2763 for (ij = 0; ij < ppr->pr_ip4s; ij++) 2764 if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 2765 break; 2766 if (ij < ppr->pr_ip4s) 2767 ii = 1; 2768 else { 2769 bcopy(pr->pr_ip4 + 1, pr->pr_ip4, 2770 --pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2771 ii = 0; 2772 } 2773 for (ij = 1; ii < pr->pr_ip4s; ) { 2774 if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { 2775 ii++; 2776 continue; 2777 } 2778 switch (ij >= ppr->pr_ip4s ? -1 : 2779 qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { 2780 case -1: 2781 bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, 2782 (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); 2783 break; 2784 case 0: 2785 ii++; 2786 ij++; 2787 break; 2788 case 1: 2789 ij++; 2790 break; 2791 } 2792 } 2793 if (pr->pr_ip4s == 0) { 2794 pr->pr_flags |= PR_IP4_DISABLE; 2795 free(pr->pr_ip4, M_PRISON); 2796 pr->pr_ip4 = NULL; 2797 } 2798 } 2799 return (0); 2800} 2801 2802/* 2803 * Pass back primary IPv4 address of this jail. 2804 * 2805 * If not restricted return success but do not alter the address. Caller has 2806 * to make sure to initialize it correctly (e.g. INADDR_ANY). 2807 * 2808 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2809 * Address returned in NBO. 2810 */ 2811int 2812prison_get_ip4(struct ucred *cred, struct in_addr *ia) 2813{ 2814 struct prison *pr; 2815 2816 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2817 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2818 2819 pr = cred->cr_prison; 2820 if (!(pr->pr_flags & PR_IP4)) 2821 return (0); 2822 mtx_lock(&pr->pr_mtx); 2823 if (!(pr->pr_flags & PR_IP4)) { 2824 mtx_unlock(&pr->pr_mtx); 2825 return (0); 2826 } 2827 if (pr->pr_ip4 == NULL) { 2828 mtx_unlock(&pr->pr_mtx); 2829 return (EAFNOSUPPORT); 2830 } 2831 2832 ia->s_addr = pr->pr_ip4[0].s_addr; 2833 mtx_unlock(&pr->pr_mtx); 2834 return (0); 2835} 2836 2837/* 2838 * Return 1 if we should do proper source address selection or are not jailed. 2839 * We will return 0 if we should bypass source address selection in favour 2840 * of the primary jail IPv4 address. Only in this case *ia will be updated and 2841 * returned in NBO. 2842 * Return EAFNOSUPPORT, in case this jail does not allow IPv4. 2843 */ 2844int 2845prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia) 2846{ 2847 struct prison *pr; 2848 struct in_addr lia; 2849 int error; 2850 2851 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2852 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2853 2854 if (!jailed(cred)) 2855 return (1); 2856 2857 pr = cred->cr_prison; 2858 if (pr->pr_flags & PR_IP4_SADDRSEL) 2859 return (1); 2860 2861 lia.s_addr = INADDR_ANY; 2862 error = prison_get_ip4(cred, &lia); 2863 if (error) 2864 return (error); 2865 if (lia.s_addr == INADDR_ANY) 2866 return (1); 2867 2868 ia->s_addr = lia.s_addr; 2869 return (0); 2870} 2871 2872/* 2873 * Return true if pr1 and pr2 have the same IPv4 address restrictions. 2874 */ 2875int 2876prison_equal_ip4(struct prison *pr1, struct prison *pr2) 2877{ 2878 2879 if (pr1 == pr2) 2880 return (1); 2881 2882 /* 2883 * No need to lock since the PR_IP4_USER flag can't be altered for 2884 * existing prisons. 2885 */ 2886 while (pr1 != &prison0 && 2887#ifdef VIMAGE 2888 !(pr1->pr_flags & PR_VNET) && 2889#endif 2890 !(pr1->pr_flags & PR_IP4_USER)) 2891 pr1 = pr1->pr_parent; 2892 while (pr2 != &prison0 && 2893#ifdef VIMAGE 2894 !(pr2->pr_flags & PR_VNET) && 2895#endif 2896 !(pr2->pr_flags & PR_IP4_USER)) 2897 pr2 = pr2->pr_parent; 2898 return (pr1 == pr2); 2899} 2900 2901/* 2902 * Make sure our (source) address is set to something meaningful to this 2903 * jail. 2904 * 2905 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2906 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2907 * doesn't allow IPv4. Address passed in in NBO and returned in NBO. 2908 */ 2909int 2910prison_local_ip4(struct ucred *cred, struct in_addr *ia) 2911{ 2912 struct prison *pr; 2913 struct in_addr ia0; 2914 int error; 2915 2916 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2917 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2918 2919 pr = cred->cr_prison; 2920 if (!(pr->pr_flags & PR_IP4)) 2921 return (0); 2922 mtx_lock(&pr->pr_mtx); 2923 if (!(pr->pr_flags & PR_IP4)) { 2924 mtx_unlock(&pr->pr_mtx); 2925 return (0); 2926 } 2927 if (pr->pr_ip4 == NULL) { 2928 mtx_unlock(&pr->pr_mtx); 2929 return (EAFNOSUPPORT); 2930 } 2931 2932 ia0.s_addr = ntohl(ia->s_addr); 2933 if (ia0.s_addr == INADDR_LOOPBACK) { 2934 ia->s_addr = pr->pr_ip4[0].s_addr; 2935 mtx_unlock(&pr->pr_mtx); 2936 return (0); 2937 } 2938 2939 if (ia0.s_addr == INADDR_ANY) { 2940 /* 2941 * In case there is only 1 IPv4 address, bind directly. 2942 */ 2943 if (pr->pr_ip4s == 1) 2944 ia->s_addr = pr->pr_ip4[0].s_addr; 2945 mtx_unlock(&pr->pr_mtx); 2946 return (0); 2947 } 2948 2949 error = _prison_check_ip4(pr, ia); 2950 mtx_unlock(&pr->pr_mtx); 2951 return (error); 2952} 2953 2954/* 2955 * Rewrite destination address in case we will connect to loopback address. 2956 * 2957 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2958 * Address passed in in NBO and returned in NBO. 2959 */ 2960int 2961prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 2962{ 2963 struct prison *pr; 2964 2965 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2966 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2967 2968 pr = cred->cr_prison; 2969 if (!(pr->pr_flags & PR_IP4)) 2970 return (0); 2971 mtx_lock(&pr->pr_mtx); 2972 if (!(pr->pr_flags & PR_IP4)) { 2973 mtx_unlock(&pr->pr_mtx); 2974 return (0); 2975 } 2976 if (pr->pr_ip4 == NULL) { 2977 mtx_unlock(&pr->pr_mtx); 2978 return (EAFNOSUPPORT); 2979 } 2980 2981 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { 2982 ia->s_addr = pr->pr_ip4[0].s_addr; 2983 mtx_unlock(&pr->pr_mtx); 2984 return (0); 2985 } 2986 2987 /* 2988 * Return success because nothing had to be changed. 2989 */ 2990 mtx_unlock(&pr->pr_mtx); 2991 return (0); 2992} 2993 2994/* 2995 * Check if given address belongs to the jail referenced by cred/prison. 2996 * 2997 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2998 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2999 * doesn't allow IPv4. Address passed in in NBO. 3000 */ 3001static int 3002_prison_check_ip4(struct prison *pr, struct in_addr *ia) 3003{ 3004 int i, a, z, d; 3005 3006 /* 3007 * Check the primary IP. 3008 */ 3009 if (pr->pr_ip4[0].s_addr == ia->s_addr) 3010 return (0); 3011 3012 /* 3013 * All the other IPs are sorted so we can do a binary search. 3014 */ 3015 a = 0; 3016 z = pr->pr_ip4s - 2; 3017 while (a <= z) { 3018 i = (a + z) / 2; 3019 d = qcmp_v4(&pr->pr_ip4[i+1], ia); 3020 if (d > 0) 3021 z = i - 1; 3022 else if (d < 0) 3023 a = i + 1; 3024 else 3025 return (0); 3026 } 3027 3028 return (EADDRNOTAVAIL); 3029} 3030 3031int 3032prison_check_ip4(struct ucred *cred, struct in_addr *ia) 3033{ 3034 struct prison *pr; 3035 int error; 3036 3037 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3038 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 3039 3040 pr = cred->cr_prison; 3041 if (!(pr->pr_flags & PR_IP4)) 3042 return (0); 3043 mtx_lock(&pr->pr_mtx); 3044 if (!(pr->pr_flags & PR_IP4)) { 3045 mtx_unlock(&pr->pr_mtx); 3046 return (0); 3047 } 3048 if (pr->pr_ip4 == NULL) { 3049 mtx_unlock(&pr->pr_mtx); 3050 return (EAFNOSUPPORT); 3051 } 3052 3053 error = _prison_check_ip4(pr, ia); 3054 mtx_unlock(&pr->pr_mtx); 3055 return (error); 3056} 3057#endif 3058 3059#ifdef INET6 3060static int 3061prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) 3062{ 3063 int ii, ij, used; 3064 struct prison *ppr; 3065 3066 ppr = pr->pr_parent; 3067 if (!(pr->pr_flags & PR_IP6_USER)) { 3068 /* This has no user settings, so just copy the parent's list. */ 3069 if (pr->pr_ip6s < ppr->pr_ip6s) { 3070 /* 3071 * There's no room for the parent's list. Use the 3072 * new list buffer, which is assumed to be big enough 3073 * (if it was passed). If there's no buffer, try to 3074 * allocate one. 3075 */ 3076 used = 1; 3077 if (newip6 == NULL) { 3078 newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), 3079 M_PRISON, M_NOWAIT); 3080 if (newip6 != NULL) 3081 used = 0; 3082 } 3083 if (newip6 != NULL) { 3084 bcopy(ppr->pr_ip6, newip6, 3085 ppr->pr_ip6s * sizeof(*newip6)); 3086 free(pr->pr_ip6, M_PRISON); 3087 pr->pr_ip6 = newip6; 3088 pr->pr_ip6s = ppr->pr_ip6s; 3089 } 3090 return (used); 3091 } 3092 pr->pr_ip6s = ppr->pr_ip6s; 3093 if (pr->pr_ip6s > 0) 3094 bcopy(ppr->pr_ip6, pr->pr_ip6, 3095 pr->pr_ip6s * sizeof(*newip6)); 3096 else if (pr->pr_ip6 != NULL) { 3097 free(pr->pr_ip6, M_PRISON); 3098 pr->pr_ip6 = NULL; 3099 } 3100 } else if (pr->pr_ip6s > 0) { 3101 /* Remove addresses that aren't in the parent. */ 3102 for (ij = 0; ij < ppr->pr_ip6s; ij++) 3103 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], 3104 &ppr->pr_ip6[ij])) 3105 break; 3106 if (ij < ppr->pr_ip6s) 3107 ii = 1; 3108 else { 3109 bcopy(pr->pr_ip6 + 1, pr->pr_ip6, 3110 --pr->pr_ip6s * sizeof(*pr->pr_ip6)); 3111 ii = 0; 3112 } 3113 for (ij = 1; ii < pr->pr_ip6s; ) { 3114 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], 3115 &ppr->pr_ip6[0])) { 3116 ii++; 3117 continue; 3118 } 3119 switch (ij >= ppr->pr_ip6s ? -1 : 3120 qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { 3121 case -1: 3122 bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, 3123 (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); 3124 break; 3125 case 0: 3126 ii++; 3127 ij++; 3128 break; 3129 case 1: 3130 ij++; 3131 break; 3132 } 3133 } 3134 if (pr->pr_ip6s == 0) { 3135 pr->pr_flags |= PR_IP6_DISABLE; 3136 free(pr->pr_ip6, M_PRISON); 3137 pr->pr_ip6 = NULL; 3138 } 3139 } 3140 return 0; 3141} 3142 3143/* 3144 * Pass back primary IPv6 address for this jail. 3145 * 3146 * If not restricted return success but do not alter the address. Caller has 3147 * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). 3148 * 3149 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 3150 */ 3151int 3152prison_get_ip6(struct ucred *cred, struct in6_addr *ia6) 3153{ 3154 struct prison *pr; 3155 3156 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3157 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3158 3159 pr = cred->cr_prison; 3160 if (!(pr->pr_flags & PR_IP6)) 3161 return (0); 3162 mtx_lock(&pr->pr_mtx); 3163 if (!(pr->pr_flags & PR_IP6)) { 3164 mtx_unlock(&pr->pr_mtx); 3165 return (0); 3166 } 3167 if (pr->pr_ip6 == NULL) { 3168 mtx_unlock(&pr->pr_mtx); 3169 return (EAFNOSUPPORT); 3170 } 3171 3172 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3173 mtx_unlock(&pr->pr_mtx); 3174 return (0); 3175} 3176 3177/* 3178 * Return 1 if we should do proper source address selection or are not jailed. 3179 * We will return 0 if we should bypass source address selection in favour 3180 * of the primary jail IPv6 address. Only in this case *ia will be updated and 3181 * returned in NBO. 3182 * Return EAFNOSUPPORT, in case this jail does not allow IPv6. 3183 */ 3184int 3185prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6) 3186{ 3187 struct prison *pr; 3188 struct in6_addr lia6; 3189 int error; 3190 3191 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3192 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3193 3194 if (!jailed(cred)) 3195 return (1); 3196 3197 pr = cred->cr_prison; 3198 if (pr->pr_flags & PR_IP6_SADDRSEL) 3199 return (1); 3200 3201 lia6 = in6addr_any; 3202 error = prison_get_ip6(cred, &lia6); 3203 if (error) 3204 return (error); 3205 if (IN6_IS_ADDR_UNSPECIFIED(&lia6)) 3206 return (1); 3207 3208 bcopy(&lia6, ia6, sizeof(struct in6_addr)); 3209 return (0); 3210} 3211 3212/* 3213 * Return true if pr1 and pr2 have the same IPv6 address restrictions. 3214 */ 3215int 3216prison_equal_ip6(struct prison *pr1, struct prison *pr2) 3217{ 3218 3219 if (pr1 == pr2) 3220 return (1); 3221 3222 while (pr1 != &prison0 && 3223#ifdef VIMAGE 3224 !(pr1->pr_flags & PR_VNET) && 3225#endif 3226 !(pr1->pr_flags & PR_IP6_USER)) 3227 pr1 = pr1->pr_parent; 3228 while (pr2 != &prison0 && 3229#ifdef VIMAGE 3230 !(pr2->pr_flags & PR_VNET) && 3231#endif 3232 !(pr2->pr_flags & PR_IP6_USER)) 3233 pr2 = pr2->pr_parent; 3234 return (pr1 == pr2); 3235} 3236 3237/* 3238 * Make sure our (source) address is set to something meaningful to this jail. 3239 * 3240 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) 3241 * when needed while binding. 3242 * 3243 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3244 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3245 * doesn't allow IPv6. 3246 */ 3247int 3248prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) 3249{ 3250 struct prison *pr; 3251 int error; 3252 3253 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3254 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3255 3256 pr = cred->cr_prison; 3257 if (!(pr->pr_flags & PR_IP6)) 3258 return (0); 3259 mtx_lock(&pr->pr_mtx); 3260 if (!(pr->pr_flags & PR_IP6)) { 3261 mtx_unlock(&pr->pr_mtx); 3262 return (0); 3263 } 3264 if (pr->pr_ip6 == NULL) { 3265 mtx_unlock(&pr->pr_mtx); 3266 return (EAFNOSUPPORT); 3267 } 3268 3269 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3270 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3271 mtx_unlock(&pr->pr_mtx); 3272 return (0); 3273 } 3274 3275 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { 3276 /* 3277 * In case there is only 1 IPv6 address, and v6only is true, 3278 * then bind directly. 3279 */ 3280 if (v6only != 0 && pr->pr_ip6s == 1) 3281 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3282 mtx_unlock(&pr->pr_mtx); 3283 return (0); 3284 } 3285 3286 error = _prison_check_ip6(pr, ia6); 3287 mtx_unlock(&pr->pr_mtx); 3288 return (error); 3289} 3290 3291/* 3292 * Rewrite destination address in case we will connect to loopback address. 3293 * 3294 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 3295 */ 3296int 3297prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) 3298{ 3299 struct prison *pr; 3300 3301 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3302 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3303 3304 pr = cred->cr_prison; 3305 if (!(pr->pr_flags & PR_IP6)) 3306 return (0); 3307 mtx_lock(&pr->pr_mtx); 3308 if (!(pr->pr_flags & PR_IP6)) { 3309 mtx_unlock(&pr->pr_mtx); 3310 return (0); 3311 } 3312 if (pr->pr_ip6 == NULL) { 3313 mtx_unlock(&pr->pr_mtx); 3314 return (EAFNOSUPPORT); 3315 } 3316 3317 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3318 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3319 mtx_unlock(&pr->pr_mtx); 3320 return (0); 3321 } 3322 3323 /* 3324 * Return success because nothing had to be changed. 3325 */ 3326 mtx_unlock(&pr->pr_mtx); 3327 return (0); 3328} 3329 3330/* 3331 * Check if given address belongs to the jail referenced by cred/prison. 3332 * 3333 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3334 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3335 * doesn't allow IPv6. 3336 */ 3337static int 3338_prison_check_ip6(struct prison *pr, struct in6_addr *ia6) 3339{ 3340 int i, a, z, d; 3341 3342 /* 3343 * Check the primary IP. 3344 */ 3345 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) 3346 return (0); 3347 3348 /* 3349 * All the other IPs are sorted so we can do a binary search. 3350 */ 3351 a = 0; 3352 z = pr->pr_ip6s - 2; 3353 while (a <= z) { 3354 i = (a + z) / 2; 3355 d = qcmp_v6(&pr->pr_ip6[i+1], ia6); 3356 if (d > 0) 3357 z = i - 1; 3358 else if (d < 0) 3359 a = i + 1; 3360 else 3361 return (0); 3362 } 3363 3364 return (EADDRNOTAVAIL); 3365} 3366 3367int 3368prison_check_ip6(struct ucred *cred, struct in6_addr *ia6) 3369{ 3370 struct prison *pr; 3371 int error; 3372 3373 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3374 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3375 3376 pr = cred->cr_prison; 3377 if (!(pr->pr_flags & PR_IP6)) 3378 return (0); 3379 mtx_lock(&pr->pr_mtx); 3380 if (!(pr->pr_flags & PR_IP6)) { 3381 mtx_unlock(&pr->pr_mtx); 3382 return (0); 3383 } 3384 if (pr->pr_ip6 == NULL) { 3385 mtx_unlock(&pr->pr_mtx); 3386 return (EAFNOSUPPORT); 3387 } 3388 3389 error = _prison_check_ip6(pr, ia6); 3390 mtx_unlock(&pr->pr_mtx); 3391 return (error); 3392} 3393#endif 3394 3395/* 3396 * Check if a jail supports the given address family. 3397 * 3398 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT 3399 * if not. 3400 */ 3401int 3402prison_check_af(struct ucred *cred, int af) 3403{ 3404 struct prison *pr; 3405 int error; 3406 3407 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3408 3409 pr = cred->cr_prison; 3410#ifdef VIMAGE 3411 /* Prisons with their own network stack are not limited. */ 3412 if (prison_owns_vnet(cred)) 3413 return (0); 3414#endif 3415 3416 error = 0; 3417 switch (af) 3418 { 3419#ifdef INET 3420 case AF_INET: 3421 if (pr->pr_flags & PR_IP4) 3422 { 3423 mtx_lock(&pr->pr_mtx); 3424 if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL) 3425 error = EAFNOSUPPORT; 3426 mtx_unlock(&pr->pr_mtx); 3427 } 3428 break; 3429#endif 3430#ifdef INET6 3431 case AF_INET6: 3432 if (pr->pr_flags & PR_IP6) 3433 { 3434 mtx_lock(&pr->pr_mtx); 3435 if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL) 3436 error = EAFNOSUPPORT; 3437 mtx_unlock(&pr->pr_mtx); 3438 } 3439 break; 3440#endif 3441 case AF_LOCAL: 3442 case AF_ROUTE: 3443 break; 3444 default: 3445 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) 3446 error = EAFNOSUPPORT; 3447 } 3448 return (error); 3449} 3450 3451/* 3452 * Check if given address belongs to the jail referenced by cred (wrapper to 3453 * prison_check_ip[46]). 3454 * 3455 * Returns 0 if jail doesn't restrict the address family or if address belongs 3456 * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if 3457 * the jail doesn't allow the address family. IPv4 Address passed in in NBO. 3458 */ 3459int 3460prison_if(struct ucred *cred, struct sockaddr *sa) 3461{ 3462#ifdef INET 3463 struct sockaddr_in *sai; 3464#endif 3465#ifdef INET6 3466 struct sockaddr_in6 *sai6; 3467#endif 3468 int error; 3469 3470 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3471 KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); 3472 3473#ifdef VIMAGE 3474 if (prison_owns_vnet(cred)) 3475 return (0); 3476#endif 3477 3478 error = 0; 3479 switch (sa->sa_family) 3480 { 3481#ifdef INET 3482 case AF_INET: 3483 sai = (struct sockaddr_in *)sa; 3484 error = prison_check_ip4(cred, &sai->sin_addr); 3485 break; 3486#endif 3487#ifdef INET6 3488 case AF_INET6: 3489 sai6 = (struct sockaddr_in6 *)sa; 3490 error = prison_check_ip6(cred, &sai6->sin6_addr); 3491 break; 3492#endif 3493 default: 3494 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) 3495 error = EAFNOSUPPORT; 3496 } 3497 return (error); 3498} 3499 3500/* 3501 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 3502 */ 3503int 3504prison_check(struct ucred *cred1, struct ucred *cred2) 3505{ 3506 3507 return ((cred1->cr_prison == cred2->cr_prison || 3508 prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); 3509} 3510 3511/* 3512 * Return 1 if p2 is a child of p1, otherwise 0. 3513 */ 3514int 3515prison_ischild(struct prison *pr1, struct prison *pr2) 3516{ 3517 3518 for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) 3519 if (pr1 == pr2) 3520 return (1); 3521 return (0); 3522} 3523 3524/* 3525 * Return 1 if the passed credential is in a jail, otherwise 0. 3526 */ 3527int 3528jailed(struct ucred *cred) 3529{ 3530 3531 return (cred->cr_prison != &prison0); 3532} 3533 3534/* 3535 * Return 1 if the passed credential is in a jail and that jail does not 3536 * have its own virtual network stack, otherwise 0. 3537 */ 3538int 3539jailed_without_vnet(struct ucred *cred) 3540{ 3541 3542 if (!jailed(cred)) 3543 return (0); 3544#ifdef VIMAGE 3545 if (prison_owns_vnet(cred)) 3546 return (0); 3547#endif 3548 3549 return (1); 3550} 3551 3552/* 3553 * Return the correct hostname (domainname, et al) for the passed credential. 3554 */ 3555void 3556getcredhostname(struct ucred *cred, char *buf, size_t size) 3557{ 3558 struct prison *pr; 3559 3560 /* 3561 * A NULL credential can be used to shortcut to the physical 3562 * system's hostname. 3563 */ 3564 pr = (cred != NULL) ? cred->cr_prison : &prison0; 3565 mtx_lock(&pr->pr_mtx); 3566 strlcpy(buf, pr->pr_hostname, size); 3567 mtx_unlock(&pr->pr_mtx); 3568} 3569 3570void 3571getcreddomainname(struct ucred *cred, char *buf, size_t size) 3572{ 3573 3574 mtx_lock(&cred->cr_prison->pr_mtx); 3575 strlcpy(buf, cred->cr_prison->pr_domainname, size); 3576 mtx_unlock(&cred->cr_prison->pr_mtx); 3577} 3578 3579void 3580getcredhostuuid(struct ucred *cred, char *buf, size_t size) 3581{ 3582 3583 mtx_lock(&cred->cr_prison->pr_mtx); 3584 strlcpy(buf, cred->cr_prison->pr_hostuuid, size); 3585 mtx_unlock(&cred->cr_prison->pr_mtx); 3586} 3587 3588void 3589getcredhostid(struct ucred *cred, unsigned long *hostid) 3590{ 3591 3592 mtx_lock(&cred->cr_prison->pr_mtx); 3593 *hostid = cred->cr_prison->pr_hostid; 3594 mtx_unlock(&cred->cr_prison->pr_mtx); 3595} 3596 3597#ifdef VIMAGE 3598/* 3599 * Determine whether the prison represented by cred owns 3600 * its vnet rather than having it inherited. 3601 * 3602 * Returns 1 in case the prison owns the vnet, 0 otherwise. 3603 */ 3604int 3605prison_owns_vnet(struct ucred *cred) 3606{ 3607 3608 /* 3609 * vnets cannot be added/removed after jail creation, 3610 * so no need to lock here. 3611 */ 3612 return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0); 3613} 3614#endif 3615 3616/* 3617 * Determine whether the subject represented by cred can "see" 3618 * status of a mount point. 3619 * Returns: 0 for permitted, ENOENT otherwise. 3620 * XXX: This function should be called cr_canseemount() and should be 3621 * placed in kern_prot.c. 3622 */ 3623int 3624prison_canseemount(struct ucred *cred, struct mount *mp) 3625{ 3626 struct prison *pr; 3627 struct statfs *sp; 3628 size_t len; 3629 3630 pr = cred->cr_prison; 3631 if (pr->pr_enforce_statfs == 0) 3632 return (0); 3633 if (pr->pr_root->v_mount == mp) 3634 return (0); 3635 if (pr->pr_enforce_statfs == 2) 3636 return (ENOENT); 3637 /* 3638 * If jail's chroot directory is set to "/" we should be able to see 3639 * all mount-points from inside a jail. 3640 * This is ugly check, but this is the only situation when jail's 3641 * directory ends with '/'. 3642 */ 3643 if (strcmp(pr->pr_path, "/") == 0) 3644 return (0); 3645 len = strlen(pr->pr_path); 3646 sp = &mp->mnt_stat; 3647 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 3648 return (ENOENT); 3649 /* 3650 * Be sure that we don't have situation where jail's root directory 3651 * is "/some/path" and mount point is "/some/pathpath". 3652 */ 3653 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 3654 return (ENOENT); 3655 return (0); 3656} 3657 3658void 3659prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 3660{ 3661 char jpath[MAXPATHLEN]; 3662 struct prison *pr; 3663 size_t len; 3664 3665 pr = cred->cr_prison; 3666 if (pr->pr_enforce_statfs == 0) 3667 return; 3668 if (prison_canseemount(cred, mp) != 0) { 3669 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3670 strlcpy(sp->f_mntonname, "[restricted]", 3671 sizeof(sp->f_mntonname)); 3672 return; 3673 } 3674 if (pr->pr_root->v_mount == mp) { 3675 /* 3676 * Clear current buffer data, so we are sure nothing from 3677 * the valid path left there. 3678 */ 3679 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3680 *sp->f_mntonname = '/'; 3681 return; 3682 } 3683 /* 3684 * If jail's chroot directory is set to "/" we should be able to see 3685 * all mount-points from inside a jail. 3686 */ 3687 if (strcmp(pr->pr_path, "/") == 0) 3688 return; 3689 len = strlen(pr->pr_path); 3690 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 3691 /* 3692 * Clear current buffer data, so we are sure nothing from 3693 * the valid path left there. 3694 */ 3695 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3696 if (*jpath == '\0') { 3697 /* Should never happen. */ 3698 *sp->f_mntonname = '/'; 3699 } else { 3700 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 3701 } 3702} 3703 3704/* 3705 * Check with permission for a specific privilege is granted within jail. We 3706 * have a specific list of accepted privileges; the rest are denied. 3707 */ 3708int 3709prison_priv_check(struct ucred *cred, int priv) 3710{ 3711 3712 if (!jailed(cred)) 3713 return (0); 3714 3715#ifdef VIMAGE 3716 /* 3717 * Privileges specific to prisons with a virtual network stack. 3718 * There might be a duplicate entry here in case the privilege 3719 * is only granted conditionally in the legacy jail case. 3720 */ 3721 switch (priv) { 3722#ifdef notyet 3723 /* 3724 * NFS-specific privileges. 3725 */ 3726 case PRIV_NFS_DAEMON: 3727 case PRIV_NFS_LOCKD: 3728#endif 3729 /* 3730 * Network stack privileges. 3731 */ 3732 case PRIV_NET_BRIDGE: 3733 case PRIV_NET_GRE: 3734 case PRIV_NET_BPF: 3735 case PRIV_NET_RAW: /* Dup, cond. in legacy jail case. */ 3736 case PRIV_NET_ROUTE: 3737 case PRIV_NET_TAP: 3738 case PRIV_NET_SETIFMTU: 3739 case PRIV_NET_SETIFFLAGS: 3740 case PRIV_NET_SETIFCAP: 3741 case PRIV_NET_SETIFDESCR: 3742 case PRIV_NET_SETIFNAME : 3743 case PRIV_NET_SETIFMETRIC: 3744 case PRIV_NET_SETIFPHYS: 3745 case PRIV_NET_SETIFMAC: 3746 case PRIV_NET_ADDMULTI: 3747 case PRIV_NET_DELMULTI: 3748 case PRIV_NET_HWIOCTL: 3749 case PRIV_NET_SETLLADDR: 3750 case PRIV_NET_ADDIFGROUP: 3751 case PRIV_NET_DELIFGROUP: 3752 case PRIV_NET_IFCREATE: 3753 case PRIV_NET_IFDESTROY: 3754 case PRIV_NET_ADDIFADDR: 3755 case PRIV_NET_DELIFADDR: 3756 case PRIV_NET_LAGG: 3757 case PRIV_NET_GIF: 3758 case PRIV_NET_SETIFVNET: 3759 case PRIV_NET_SETIFFIB: 3760 3761 /* 3762 * 802.11-related privileges. 3763 */ 3764 case PRIV_NET80211_GETKEY: 3765#ifdef notyet 3766 case PRIV_NET80211_MANAGE: /* XXX-BZ discuss with sam@ */ 3767#endif 3768 3769#ifdef notyet 3770 /* 3771 * AppleTalk privileges. 3772 */ 3773 case PRIV_NETATALK_RESERVEDPORT: 3774 3775 /* 3776 * ATM privileges. 3777 */ 3778 case PRIV_NETATM_CFG: 3779 case PRIV_NETATM_ADD: 3780 case PRIV_NETATM_DEL: 3781 case PRIV_NETATM_SET: 3782 3783 /* 3784 * Bluetooth privileges. 3785 */ 3786 case PRIV_NETBLUETOOTH_RAW: 3787#endif 3788 3789 /* 3790 * Netgraph and netgraph module privileges. 3791 */ 3792 case PRIV_NETGRAPH_CONTROL: 3793#ifdef notyet 3794 case PRIV_NETGRAPH_TTY: 3795#endif 3796 3797 /* 3798 * IPv4 and IPv6 privileges. 3799 */ 3800 case PRIV_NETINET_IPFW: 3801 case PRIV_NETINET_DIVERT: 3802 case PRIV_NETINET_PF: 3803 case PRIV_NETINET_DUMMYNET: 3804 case PRIV_NETINET_CARP: 3805 case PRIV_NETINET_MROUTE: 3806 case PRIV_NETINET_RAW: 3807 case PRIV_NETINET_ADDRCTRL6: 3808 case PRIV_NETINET_ND6: 3809 case PRIV_NETINET_SCOPE6: 3810 case PRIV_NETINET_ALIFETIME6: 3811 case PRIV_NETINET_IPSEC: 3812 case PRIV_NETINET_BINDANY: 3813 3814#ifdef notyet 3815 /* 3816 * IPX/SPX privileges. 3817 */ 3818 case PRIV_NETIPX_RESERVEDPORT: 3819 case PRIV_NETIPX_RAW: 3820 3821 /* 3822 * NCP privileges. 3823 */ 3824 case PRIV_NETNCP: 3825 3826 /* 3827 * SMB privileges. 3828 */ 3829 case PRIV_NETSMB: 3830#endif 3831 3832 /* 3833 * No default: or deny here. 3834 * In case of no permit fall through to next switch(). 3835 */ 3836 if (cred->cr_prison->pr_flags & PR_VNET) 3837 return (0); 3838 } 3839#endif /* VIMAGE */ 3840 3841 switch (priv) { 3842 3843 /* 3844 * Allow ktrace privileges for root in jail. 3845 */ 3846 case PRIV_KTRACE: 3847 3848#if 0 3849 /* 3850 * Allow jailed processes to configure audit identity and 3851 * submit audit records (login, etc). In the future we may 3852 * want to further refine the relationship between audit and 3853 * jail. 3854 */ 3855 case PRIV_AUDIT_GETAUDIT: 3856 case PRIV_AUDIT_SETAUDIT: 3857 case PRIV_AUDIT_SUBMIT: 3858#endif 3859 3860 /* 3861 * Allow jailed processes to manipulate process UNIX 3862 * credentials in any way they see fit. 3863 */ 3864 case PRIV_CRED_SETUID: 3865 case PRIV_CRED_SETEUID: 3866 case PRIV_CRED_SETGID: 3867 case PRIV_CRED_SETEGID: 3868 case PRIV_CRED_SETGROUPS: 3869 case PRIV_CRED_SETREUID: 3870 case PRIV_CRED_SETREGID: 3871 case PRIV_CRED_SETRESUID: 3872 case PRIV_CRED_SETRESGID: 3873 3874 /* 3875 * Jail implements visibility constraints already, so allow 3876 * jailed root to override uid/gid-based constraints. 3877 */ 3878 case PRIV_SEEOTHERGIDS: 3879 case PRIV_SEEOTHERUIDS: 3880 3881 /* 3882 * Jail implements inter-process debugging limits already, so 3883 * allow jailed root various debugging privileges. 3884 */ 3885 case PRIV_DEBUG_DIFFCRED: 3886 case PRIV_DEBUG_SUGID: 3887 case PRIV_DEBUG_UNPRIV: 3888 3889 /* 3890 * Allow jail to set various resource limits and login 3891 * properties, and for now, exceed process resource limits. 3892 */ 3893 case PRIV_PROC_LIMIT: 3894 case PRIV_PROC_SETLOGIN: 3895 case PRIV_PROC_SETRLIMIT: 3896 3897 /* 3898 * System V and POSIX IPC privileges are granted in jail. 3899 */ 3900 case PRIV_IPC_READ: 3901 case PRIV_IPC_WRITE: 3902 case PRIV_IPC_ADMIN: 3903 case PRIV_IPC_MSGSIZE: 3904 case PRIV_MQ_ADMIN: 3905 3906 /* 3907 * Jail operations within a jail work on child jails. 3908 */ 3909 case PRIV_JAIL_ATTACH: 3910 case PRIV_JAIL_SET: 3911 case PRIV_JAIL_REMOVE: 3912 3913 /* 3914 * Jail implements its own inter-process limits, so allow 3915 * root processes in jail to change scheduling on other 3916 * processes in the same jail. Likewise for signalling. 3917 */ 3918 case PRIV_SCHED_DIFFCRED: 3919 case PRIV_SCHED_CPUSET: 3920 case PRIV_SIGNAL_DIFFCRED: 3921 case PRIV_SIGNAL_SUGID: 3922 3923 /* 3924 * Allow jailed processes to write to sysctls marked as jail 3925 * writable. 3926 */ 3927 case PRIV_SYSCTL_WRITEJAIL: 3928 3929 /* 3930 * Allow root in jail to manage a variety of quota 3931 * properties. These should likely be conditional on a 3932 * configuration option. 3933 */ 3934 case PRIV_VFS_GETQUOTA: 3935 case PRIV_VFS_SETQUOTA: 3936 3937 /* 3938 * Since Jail relies on chroot() to implement file system 3939 * protections, grant many VFS privileges to root in jail. 3940 * Be careful to exclude mount-related and NFS-related 3941 * privileges. 3942 */ 3943 case PRIV_VFS_READ: 3944 case PRIV_VFS_WRITE: 3945 case PRIV_VFS_ADMIN: 3946 case PRIV_VFS_EXEC: 3947 case PRIV_VFS_LOOKUP: 3948 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 3949 case PRIV_VFS_CHFLAGS_DEV: 3950 case PRIV_VFS_CHOWN: 3951 case PRIV_VFS_CHROOT: 3952 case PRIV_VFS_RETAINSUGID: 3953 case PRIV_VFS_FCHROOT: 3954 case PRIV_VFS_LINK: 3955 case PRIV_VFS_SETGID: 3956 case PRIV_VFS_STAT: 3957 case PRIV_VFS_STICKYFILE: 3958 3959 /* 3960 * As in the non-jail case, non-root users are expected to be 3961 * able to read kernel/phyiscal memory (provided /dev/[k]mem 3962 * exists in the jail and they have permission to access it). 3963 */ 3964 case PRIV_KMEM_READ: 3965 return (0); 3966 3967 /* 3968 * Depending on the global setting, allow privilege of 3969 * setting system flags. 3970 */ 3971 case PRIV_VFS_SYSFLAGS: 3972 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) 3973 return (0); 3974 else 3975 return (EPERM); 3976 3977 /* 3978 * Depending on the global setting, allow privilege of 3979 * mounting/unmounting file systems. 3980 */ 3981 case PRIV_VFS_MOUNT: 3982 case PRIV_VFS_UNMOUNT: 3983 case PRIV_VFS_MOUNT_NONUSER: 3984 case PRIV_VFS_MOUNT_OWNER: 3985 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT && 3986 cred->cr_prison->pr_enforce_statfs < 2) 3987 return (0); 3988 else 3989 return (EPERM); 3990 3991 /* 3992 * Allow jailed root to bind reserved ports and reuse in-use 3993 * ports. 3994 */ 3995 case PRIV_NETINET_RESERVEDPORT: 3996 case PRIV_NETINET_REUSEPORT: 3997 return (0); 3998 3999 /* 4000 * Allow jailed root to set certian IPv4/6 (option) headers. 4001 */ 4002 case PRIV_NETINET_SETHDROPTS: 4003 return (0); 4004 4005 /* 4006 * Conditionally allow creating raw sockets in jail. 4007 */ 4008 case PRIV_NETINET_RAW: 4009 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) 4010 return (0); 4011 else 4012 return (EPERM); 4013 4014 /* 4015 * Since jail implements its own visibility limits on netstat 4016 * sysctls, allow getcred. This allows identd to work in 4017 * jail. 4018 */ 4019 case PRIV_NETINET_GETCRED: 4020 return (0); 4021 4022 /* 4023 * Allow jailed root to set loginclass. 4024 */ 4025 case PRIV_PROC_SETLOGINCLASS: 4026 return (0); 4027 4028 default: 4029 /* 4030 * In all remaining cases, deny the privilege request. This 4031 * includes almost all network privileges, many system 4032 * configuration privileges. 4033 */ 4034 return (EPERM); 4035 } 4036} 4037 4038/* 4039 * Return the part of pr2's name that is relative to pr1, or the whole name 4040 * if it does not directly follow. 4041 */ 4042 4043char * 4044prison_name(struct prison *pr1, struct prison *pr2) 4045{ 4046 char *name; 4047 4048 /* Jails see themselves as "0" (if they see themselves at all). */ 4049 if (pr1 == pr2) 4050 return "0"; 4051 name = pr2->pr_name; 4052 if (prison_ischild(pr1, pr2)) { 4053 /* 4054 * pr1 isn't locked (and allprison_lock may not be either) 4055 * so its length can't be counted on. But the number of dots 4056 * can be counted on - and counted. 4057 */ 4058 for (; pr1 != &prison0; pr1 = pr1->pr_parent) 4059 name = strchr(name, '.') + 1; 4060 } 4061 return (name); 4062} 4063 4064/* 4065 * Return the part of pr2's path that is relative to pr1, or the whole path 4066 * if it does not directly follow. 4067 */ 4068static char * 4069prison_path(struct prison *pr1, struct prison *pr2) 4070{ 4071 char *path1, *path2; 4072 int len1; 4073 4074 path1 = pr1->pr_path; 4075 path2 = pr2->pr_path; 4076 if (!strcmp(path1, "/")) 4077 return (path2); 4078 len1 = strlen(path1); 4079 if (strncmp(path1, path2, len1)) 4080 return (path2); 4081 if (path2[len1] == '\0') 4082 return "/"; 4083 if (path2[len1] == '/') 4084 return (path2 + len1); 4085 return (path2); 4086} 4087 4088 4089/* 4090 * Jail-related sysctls. 4091 */ 4092static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 4093 "Jails"); 4094 4095static int 4096sysctl_jail_list(SYSCTL_HANDLER_ARGS) 4097{ 4098 struct xprison *xp; 4099 struct prison *pr, *cpr; 4100#ifdef INET 4101 struct in_addr *ip4 = NULL; 4102 int ip4s = 0; 4103#endif 4104#ifdef INET6 4105 struct in6_addr *ip6 = NULL; 4106 int ip6s = 0; 4107#endif 4108 int descend, error; 4109 4110 xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); 4111 pr = req->td->td_ucred->cr_prison; 4112 error = 0; 4113 sx_slock(&allprison_lock); 4114 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 4115#if defined(INET) || defined(INET6) 4116 again: 4117#endif 4118 mtx_lock(&cpr->pr_mtx); 4119#ifdef INET 4120 if (cpr->pr_ip4s > 0) { 4121 if (ip4s < cpr->pr_ip4s) { 4122 ip4s = cpr->pr_ip4s; 4123 mtx_unlock(&cpr->pr_mtx); 4124 ip4 = realloc(ip4, ip4s * 4125 sizeof(struct in_addr), M_TEMP, M_WAITOK); 4126 goto again; 4127 } 4128 bcopy(cpr->pr_ip4, ip4, 4129 cpr->pr_ip4s * sizeof(struct in_addr)); 4130 } 4131#endif 4132#ifdef INET6 4133 if (cpr->pr_ip6s > 0) { 4134 if (ip6s < cpr->pr_ip6s) { 4135 ip6s = cpr->pr_ip6s; 4136 mtx_unlock(&cpr->pr_mtx); 4137 ip6 = realloc(ip6, ip6s * 4138 sizeof(struct in6_addr), M_TEMP, M_WAITOK); 4139 goto again; 4140 } 4141 bcopy(cpr->pr_ip6, ip6, 4142 cpr->pr_ip6s * sizeof(struct in6_addr)); 4143 } 4144#endif 4145 if (cpr->pr_ref == 0) { 4146 mtx_unlock(&cpr->pr_mtx); 4147 continue; 4148 } 4149 bzero(xp, sizeof(*xp)); 4150 xp->pr_version = XPRISON_VERSION; 4151 xp->pr_id = cpr->pr_id; 4152 xp->pr_state = cpr->pr_uref > 0 4153 ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; 4154 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); 4155 strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); 4156 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); 4157#ifdef INET 4158 xp->pr_ip4s = cpr->pr_ip4s; 4159#endif 4160#ifdef INET6 4161 xp->pr_ip6s = cpr->pr_ip6s; 4162#endif 4163 mtx_unlock(&cpr->pr_mtx); 4164 error = SYSCTL_OUT(req, xp, sizeof(*xp)); 4165 if (error) 4166 break; 4167#ifdef INET 4168 if (xp->pr_ip4s > 0) { 4169 error = SYSCTL_OUT(req, ip4, 4170 xp->pr_ip4s * sizeof(struct in_addr)); 4171 if (error) 4172 break; 4173 } 4174#endif 4175#ifdef INET6 4176 if (xp->pr_ip6s > 0) { 4177 error = SYSCTL_OUT(req, ip6, 4178 xp->pr_ip6s * sizeof(struct in6_addr)); 4179 if (error) 4180 break; 4181 } 4182#endif 4183 } 4184 sx_sunlock(&allprison_lock); 4185 free(xp, M_TEMP); 4186#ifdef INET 4187 free(ip4, M_TEMP); 4188#endif 4189#ifdef INET6 4190 free(ip6, M_TEMP); 4191#endif 4192 return (error); 4193} 4194 4195SYSCTL_OID(_security_jail, OID_AUTO, list, 4196 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4197 sysctl_jail_list, "S", "List of active jails"); 4198 4199static int 4200sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 4201{ 4202 int error, injail; 4203 4204 injail = jailed(req->td->td_ucred); 4205 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 4206 4207 return (error); 4208} 4209 4210SYSCTL_PROC(_security_jail, OID_AUTO, jailed, 4211 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4212 sysctl_jail_jailed, "I", "Process in jail?"); 4213 4214static int 4215sysctl_jail_vnet(SYSCTL_HANDLER_ARGS) 4216{ 4217 int error, havevnet; 4218#ifdef VIMAGE 4219 struct ucred *cred = req->td->td_ucred; 4220 4221 havevnet = jailed(cred) && prison_owns_vnet(cred); 4222#else 4223 havevnet = 0; 4224#endif 4225 error = SYSCTL_OUT(req, &havevnet, sizeof(havevnet)); 4226 4227 return (error); 4228} 4229 4230SYSCTL_PROC(_security_jail, OID_AUTO, vnet, 4231 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4232 sysctl_jail_vnet, "I", "Jail owns VNET?"); 4233 4234#if defined(INET) || defined(INET6) 4235SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, 4236 &jail_max_af_ips, 0, 4237 "Number of IP addresses a jail may have at most per address family"); 4238#endif 4239 4240/* 4241 * Default parameters for jail(2) compatability. For historical reasons, 4242 * the sysctl names have varying similarity to the parameter names. Prisons 4243 * just see their own parameters, and can't change them. 4244 */ 4245static int 4246sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) 4247{ 4248 struct prison *pr; 4249 int allow, error, i; 4250 4251 pr = req->td->td_ucred->cr_prison; 4252 allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow; 4253 4254 /* Get the current flag value, and convert it to a boolean. */ 4255 i = (allow & arg2) ? 1 : 0; 4256 if (arg1 != NULL) 4257 i = !i; 4258 error = sysctl_handle_int(oidp, &i, 0, req); 4259 if (error || !req->newptr) 4260 return (error); 4261 i = i ? arg2 : 0; 4262 if (arg1 != NULL) 4263 i ^= arg2; 4264 /* 4265 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 4266 * for writing. 4267 */ 4268 mtx_lock(&prison0.pr_mtx); 4269 jail_default_allow = (jail_default_allow & ~arg2) | i; 4270 mtx_unlock(&prison0.pr_mtx); 4271 return (0); 4272} 4273 4274SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, 4275 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4276 NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", 4277 "Processes in jail can set their hostnames"); 4278SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, 4279 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4280 (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", 4281 "Processes in jail are limited to creating UNIX/IP/route sockets only"); 4282SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, 4283 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4284 NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", 4285 "Processes in jail can use System V IPC primitives"); 4286SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, 4287 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4288 NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", 4289 "Prison root can create raw sockets"); 4290SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, 4291 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4292 NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", 4293 "Processes in jail can alter system file flags"); 4294SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, 4295 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4296 NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", 4297 "Processes in jail can mount/unmount jail-friendly file systems"); 4298SYSCTL_PROC(_security_jail, OID_AUTO, mount_devfs_allowed, 4299 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4300 NULL, PR_ALLOW_MOUNT_DEVFS, sysctl_jail_default_allow, "I", 4301 "Processes in jail can mount the devfs file system"); 4302SYSCTL_PROC(_security_jail, OID_AUTO, mount_fdescfs_allowed, 4303 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4304 NULL, PR_ALLOW_MOUNT_FDESCFS, sysctl_jail_default_allow, "I", 4305 "Processes in jail can mount the fdescfs file system"); 4306SYSCTL_PROC(_security_jail, OID_AUTO, mount_nullfs_allowed, 4307 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4308 NULL, PR_ALLOW_MOUNT_NULLFS, sysctl_jail_default_allow, "I", 4309 "Processes in jail can mount the nullfs file system"); 4310SYSCTL_PROC(_security_jail, OID_AUTO, mount_procfs_allowed, 4311 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4312 NULL, PR_ALLOW_MOUNT_PROCFS, sysctl_jail_default_allow, "I", 4313 "Processes in jail can mount the procfs file system"); 4314SYSCTL_PROC(_security_jail, OID_AUTO, mount_tmpfs_allowed, 4315 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4316 NULL, PR_ALLOW_MOUNT_TMPFS, sysctl_jail_default_allow, "I", 4317 "Processes in jail can mount the tmpfs file system"); 4318SYSCTL_PROC(_security_jail, OID_AUTO, mount_zfs_allowed, 4319 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4320 NULL, PR_ALLOW_MOUNT_ZFS, sysctl_jail_default_allow, "I", 4321 "Processes in jail can mount the zfs file system"); 4322 4323static int 4324sysctl_jail_default_level(SYSCTL_HANDLER_ARGS) 4325{ 4326 struct prison *pr; 4327 int level, error; 4328 4329 pr = req->td->td_ucred->cr_prison; 4330 level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2); 4331 error = sysctl_handle_int(oidp, &level, 0, req); 4332 if (error || !req->newptr) 4333 return (error); 4334 *(int *)arg1 = level; 4335 return (0); 4336} 4337 4338SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs, 4339 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4340 &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs), 4341 sysctl_jail_default_level, "I", 4342 "Processes in jail cannot see all mounted file systems"); 4343 4344SYSCTL_PROC(_security_jail, OID_AUTO, devfs_ruleset, 4345 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 4346 &jail_default_devfs_rsnum, offsetof(struct prison, pr_devfs_rsnum), 4347 sysctl_jail_default_level, "I", 4348 "Ruleset for the devfs filesystem in jail"); 4349 4350/* 4351 * Nodes to describe jail parameters. Maximum length of string parameters 4352 * is returned in the string itself, and the other parameters exist merely 4353 * to make themselves and their types known. 4354 */ 4355SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0, 4356 "Jail parameters"); 4357 4358int 4359sysctl_jail_param(SYSCTL_HANDLER_ARGS) 4360{ 4361 int i; 4362 long l; 4363 size_t s; 4364 char numbuf[12]; 4365 4366 switch (oidp->oid_kind & CTLTYPE) 4367 { 4368 case CTLTYPE_LONG: 4369 case CTLTYPE_ULONG: 4370 l = 0; 4371#ifdef SCTL_MASK32 4372 if (!(req->flags & SCTL_MASK32)) 4373#endif 4374 return (SYSCTL_OUT(req, &l, sizeof(l))); 4375 case CTLTYPE_INT: 4376 case CTLTYPE_UINT: 4377 i = 0; 4378 return (SYSCTL_OUT(req, &i, sizeof(i))); 4379 case CTLTYPE_STRING: 4380 snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2); 4381 return 4382 (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req)); 4383 case CTLTYPE_STRUCT: 4384 s = (size_t)arg2; 4385 return (SYSCTL_OUT(req, &s, sizeof(s))); 4386 } 4387 return (0); 4388} 4389 4390/* 4391 * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at 4392 * jail creation time but cannot be changed in an existing jail. 4393 */ 4394SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); 4395SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); 4396SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); 4397SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); 4398SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, 4399 "I", "Jail secure level"); 4400SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I", 4401 "Jail value for kern.osreldate and uname -K"); 4402SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN, 4403 "Jail value for kern.osrelease and uname -r"); 4404SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, 4405 "I", "Jail cannot see all mounted file systems"); 4406SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW, 4407 "I", "Ruleset for in-jail devfs mounts"); 4408SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, 4409 "B", "Jail persistence"); 4410#ifdef VIMAGE 4411SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN, 4412 "E,jailsys", "Virtual network stack"); 4413#endif 4414SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, 4415 "B", "Jail is in the process of shutting down"); 4416 4417SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails"); 4418SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD, 4419 "I", "Current number of child jails"); 4420SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW, 4421 "I", "Maximum number of child jails"); 4422 4423SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info"); 4424SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, 4425 "Jail hostname"); 4426SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN, 4427 "Jail NIS domainname"); 4428SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN, 4429 "Jail host UUID"); 4430SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW, 4431 "LU", "Jail host ID"); 4432 4433SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset"); 4434SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); 4435 4436#ifdef INET 4437SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN, 4438 "Jail IPv4 address virtualization"); 4439SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), 4440 "S,in_addr,a", "Jail IPv4 addresses"); 4441SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW, 4442 "B", "Do (not) use IPv4 source address selection rather than the " 4443 "primary jail IPv4 address."); 4444#endif 4445#ifdef INET6 4446SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN, 4447 "Jail IPv6 address virtualization"); 4448SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), 4449 "S,in6_addr,a", "Jail IPv6 addresses"); 4450SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW, 4451 "B", "Do (not) use IPv6 source address selection rather than the " 4452 "primary jail IPv6 address."); 4453#endif 4454 4455SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags"); 4456SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW, 4457 "B", "Jail may set hostname"); 4458SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW, 4459 "B", "Jail may use SYSV IPC"); 4460SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW, 4461 "B", "Jail may create raw sockets"); 4462SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW, 4463 "B", "Jail may alter system file flags"); 4464SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, 4465 "B", "Jail may set file quotas"); 4466SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, 4467 "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); 4468 4469SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags"); 4470SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW, 4471 "B", "Jail may mount/unmount jail-friendly file systems in general"); 4472SYSCTL_JAIL_PARAM(_allow_mount, devfs, CTLTYPE_INT | CTLFLAG_RW, 4473 "B", "Jail may mount the devfs file system"); 4474SYSCTL_JAIL_PARAM(_allow_mount, fdescfs, CTLTYPE_INT | CTLFLAG_RW, 4475 "B", "Jail may mount the fdescfs file system"); 4476SYSCTL_JAIL_PARAM(_allow_mount, nullfs, CTLTYPE_INT | CTLFLAG_RW, 4477 "B", "Jail may mount the nullfs file system"); 4478SYSCTL_JAIL_PARAM(_allow_mount, procfs, CTLTYPE_INT | CTLFLAG_RW, 4479 "B", "Jail may mount the procfs file system"); 4480SYSCTL_JAIL_PARAM(_allow_mount, tmpfs, CTLTYPE_INT | CTLFLAG_RW, 4481 "B", "Jail may mount the tmpfs file system"); 4482SYSCTL_JAIL_PARAM(_allow_mount, zfs, CTLTYPE_INT | CTLFLAG_RW, 4483 "B", "Jail may mount the zfs file system"); 4484 4485void 4486prison_racct_foreach(void (*callback)(struct racct *racct, 4487 void *arg2, void *arg3), void *arg2, void *arg3) 4488{ 4489 struct prison_racct *prr; 4490 4491 sx_slock(&allprison_lock); 4492 LIST_FOREACH(prr, &allprison_racct, prr_next) 4493 (callback)(prr->prr_racct, arg2, arg3); 4494 sx_sunlock(&allprison_lock); 4495} 4496 4497static struct prison_racct * 4498prison_racct_find_locked(const char *name) 4499{ 4500 struct prison_racct *prr; 4501 4502 sx_assert(&allprison_lock, SA_XLOCKED); 4503 4504 if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN) 4505 return (NULL); 4506 4507 LIST_FOREACH(prr, &allprison_racct, prr_next) { 4508 if (strcmp(name, prr->prr_name) != 0) 4509 continue; 4510 4511 /* Found prison_racct with a matching name? */ 4512 prison_racct_hold(prr); 4513 return (prr); 4514 } 4515 4516 /* Add new prison_racct. */ 4517 prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK); 4518 racct_create(&prr->prr_racct); 4519 4520 strcpy(prr->prr_name, name); 4521 refcount_init(&prr->prr_refcount, 1); 4522 LIST_INSERT_HEAD(&allprison_racct, prr, prr_next); 4523 4524 return (prr); 4525} 4526 4527struct prison_racct * 4528prison_racct_find(const char *name) 4529{ 4530 struct prison_racct *prr; 4531 4532 sx_xlock(&allprison_lock); 4533 prr = prison_racct_find_locked(name); 4534 sx_xunlock(&allprison_lock); 4535 return (prr); 4536} 4537 4538void 4539prison_racct_hold(struct prison_racct *prr) 4540{ 4541 4542 refcount_acquire(&prr->prr_refcount); 4543} 4544 4545static void 4546prison_racct_free_locked(struct prison_racct *prr) 4547{ 4548 4549 sx_assert(&allprison_lock, SA_XLOCKED); 4550 4551 if (refcount_release(&prr->prr_refcount)) { 4552 racct_destroy(&prr->prr_racct); 4553 LIST_REMOVE(prr, prr_next); 4554 free(prr, M_PRISON_RACCT); 4555 } 4556} 4557 4558void 4559prison_racct_free(struct prison_racct *prr) 4560{ 4561 int old; 4562 4563 sx_assert(&allprison_lock, SA_UNLOCKED); 4564 4565 old = prr->prr_refcount; 4566 if (old > 1 && atomic_cmpset_int(&prr->prr_refcount, old, old - 1)) 4567 return; 4568 4569 sx_xlock(&allprison_lock); 4570 prison_racct_free_locked(prr); 4571 sx_xunlock(&allprison_lock); 4572} 4573 4574#ifdef RACCT 4575static void 4576prison_racct_attach(struct prison *pr) 4577{ 4578 struct prison_racct *prr; 4579 4580 sx_assert(&allprison_lock, SA_XLOCKED); 4581 4582 prr = prison_racct_find_locked(pr->pr_name); 4583 KASSERT(prr != NULL, ("cannot find prison_racct")); 4584 4585 pr->pr_prison_racct = prr; 4586} 4587 4588/* 4589 * Handle jail renaming. From the racct point of view, renaming means 4590 * moving from one prison_racct to another. 4591 */ 4592static void 4593prison_racct_modify(struct prison *pr) 4594{ 4595 struct proc *p; 4596 struct ucred *cred; 4597 struct prison_racct *oldprr; 4598 4599 sx_slock(&allproc_lock); 4600 sx_xlock(&allprison_lock); 4601 4602 if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) { 4603 sx_xunlock(&allprison_lock); 4604 sx_sunlock(&allproc_lock); 4605 return; 4606 } 4607 4608 oldprr = pr->pr_prison_racct; 4609 pr->pr_prison_racct = NULL; 4610 4611 prison_racct_attach(pr); 4612 4613 /* 4614 * Move resource utilisation records. 4615 */ 4616 racct_move(pr->pr_prison_racct->prr_racct, oldprr->prr_racct); 4617 4618 /* 4619 * Force rctl to reattach rules to processes. 4620 */ 4621 FOREACH_PROC_IN_SYSTEM(p) { 4622 PROC_LOCK(p); 4623 cred = crhold(p->p_ucred); 4624 PROC_UNLOCK(p); 4625 racct_proc_ucred_changed(p, cred, cred); 4626 crfree(cred); 4627 } 4628 4629 sx_sunlock(&allproc_lock); 4630 prison_racct_free_locked(oldprr); 4631 sx_xunlock(&allprison_lock); 4632} 4633 4634static void 4635prison_racct_detach(struct prison *pr) 4636{ 4637 4638 sx_assert(&allprison_lock, SA_UNLOCKED); 4639 4640 if (pr->pr_prison_racct == NULL) 4641 return; 4642 prison_racct_free(pr->pr_prison_racct); 4643 pr->pr_prison_racct = NULL; 4644} 4645#endif /* RACCT */ 4646 4647#ifdef DDB 4648 4649static void 4650db_show_prison(struct prison *pr) 4651{ 4652 int fi; 4653#if defined(INET) || defined(INET6) 4654 int ii; 4655#endif 4656 unsigned jsf; 4657#ifdef INET6 4658 char ip6buf[INET6_ADDRSTRLEN]; 4659#endif 4660 4661 db_printf("prison %p:\n", pr); 4662 db_printf(" jid = %d\n", pr->pr_id); 4663 db_printf(" name = %s\n", pr->pr_name); 4664 db_printf(" parent = %p\n", pr->pr_parent); 4665 db_printf(" ref = %d\n", pr->pr_ref); 4666 db_printf(" uref = %d\n", pr->pr_uref); 4667 db_printf(" path = %s\n", pr->pr_path); 4668 db_printf(" cpuset = %d\n", pr->pr_cpuset 4669 ? pr->pr_cpuset->cs_id : -1); 4670#ifdef VIMAGE 4671 db_printf(" vnet = %p\n", pr->pr_vnet); 4672#endif 4673 db_printf(" root = %p\n", pr->pr_root); 4674 db_printf(" securelevel = %d\n", pr->pr_securelevel); 4675 db_printf(" devfs_rsnum = %d\n", pr->pr_devfs_rsnum); 4676 db_printf(" children.max = %d\n", pr->pr_childmax); 4677 db_printf(" children.cur = %d\n", pr->pr_childcount); 4678 db_printf(" child = %p\n", LIST_FIRST(&pr->pr_children)); 4679 db_printf(" sibling = %p\n", LIST_NEXT(pr, pr_sibling)); 4680 db_printf(" flags = 0x%x", pr->pr_flags); 4681 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 4682 fi++) 4683 if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi))) 4684 db_printf(" %s", pr_flag_names[fi]); 4685 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 4686 fi++) { 4687 jsf = pr->pr_flags & 4688 (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); 4689 db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name, 4690 pr_flag_jailsys[fi].disable && 4691 (jsf == pr_flag_jailsys[fi].disable) ? "disable" 4692 : (jsf == pr_flag_jailsys[fi].new) ? "new" 4693 : "inherit"); 4694 } 4695 db_printf(" allow = 0x%x", pr->pr_allow); 4696 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 4697 fi++) 4698 if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi))) 4699 db_printf(" %s", pr_allow_names[fi]); 4700 db_printf("\n"); 4701 db_printf(" enforce_statfs = %d\n", pr->pr_enforce_statfs); 4702 db_printf(" host.hostname = %s\n", pr->pr_hostname); 4703 db_printf(" host.domainname = %s\n", pr->pr_domainname); 4704 db_printf(" host.hostuuid = %s\n", pr->pr_hostuuid); 4705 db_printf(" host.hostid = %lu\n", pr->pr_hostid); 4706#ifdef INET 4707 db_printf(" ip4s = %d\n", pr->pr_ip4s); 4708 for (ii = 0; ii < pr->pr_ip4s; ii++) 4709 db_printf(" %s %s\n", 4710 ii == 0 ? "ip4.addr =" : " ", 4711 inet_ntoa(pr->pr_ip4[ii])); 4712#endif 4713#ifdef INET6 4714 db_printf(" ip6s = %d\n", pr->pr_ip6s); 4715 for (ii = 0; ii < pr->pr_ip6s; ii++) 4716 db_printf(" %s %s\n", 4717 ii == 0 ? "ip6.addr =" : " ", 4718 ip6_sprintf(ip6buf, &pr->pr_ip6[ii])); 4719#endif 4720} 4721 4722DB_SHOW_COMMAND(prison, db_show_prison_command) 4723{ 4724 struct prison *pr; 4725 4726 if (!have_addr) { 4727 /* 4728 * Show all prisons in the list, and prison0 which is not 4729 * listed. 4730 */ 4731 db_show_prison(&prison0); 4732 if (!db_pager_quit) { 4733 TAILQ_FOREACH(pr, &allprison, pr_list) { 4734 db_show_prison(pr); 4735 if (db_pager_quit) 4736 break; 4737 } 4738 } 4739 return; 4740 } 4741 4742 if (addr == 0) 4743 pr = &prison0; 4744 else { 4745 /* Look for a prison with the ID and with references. */ 4746 TAILQ_FOREACH(pr, &allprison, pr_list) 4747 if (pr->pr_id == addr && pr->pr_ref > 0) 4748 break; 4749 if (pr == NULL) 4750 /* Look again, without requiring a reference. */ 4751 TAILQ_FOREACH(pr, &allprison, pr_list) 4752 if (pr->pr_id == addr) 4753 break; 4754 if (pr == NULL) 4755 /* Assume address points to a valid prison. */ 4756 pr = (struct prison *)addr; 4757 } 4758 db_show_prison(pr); 4759} 4760 4761#endif /* DDB */ 4762