kern_jail.c revision 194090
1/*- 2 * Copyright (c) 1999 Poul-Henning Kamp. 3 * Copyright (c) 2008 Bjoern A. Zeeb. 4 * Copyright (c) 2009 James Gritton. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 194090 2009-06-13 00:12:02Z jamie $"); 31 32#include "opt_compat.h" 33#include "opt_ddb.h" 34#include "opt_inet.h" 35#include "opt_inet6.h" 36 37#include <sys/param.h> 38#include <sys/types.h> 39#include <sys/kernel.h> 40#include <sys/systm.h> 41#include <sys/errno.h> 42#include <sys/sysproto.h> 43#include <sys/malloc.h> 44#include <sys/osd.h> 45#include <sys/priv.h> 46#include <sys/proc.h> 47#include <sys/taskqueue.h> 48#include <sys/fcntl.h> 49#include <sys/jail.h> 50#include <sys/lock.h> 51#include <sys/mutex.h> 52#include <sys/sx.h> 53#include <sys/sysent.h> 54#include <sys/namei.h> 55#include <sys/mount.h> 56#include <sys/queue.h> 57#include <sys/socket.h> 58#include <sys/syscallsubr.h> 59#include <sys/sysctl.h> 60#include <sys/vnode.h> 61#include <sys/vimage.h> 62#include <net/if.h> 63#include <netinet/in.h> 64#ifdef DDB 65#include <ddb/ddb.h> 66#ifdef INET6 67#include <netinet6/in6_var.h> 68#endif /* INET6 */ 69#endif /* DDB */ 70 71#include <security/mac/mac_framework.h> 72 73MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 74 75/* prison0 describes what is "real" about the system. */ 76struct prison prison0 = { 77 .pr_id = 0, 78 .pr_name = "0", 79 .pr_ref = 1, 80 .pr_uref = 1, 81 .pr_path = "/", 82 .pr_securelevel = -1, 83 .pr_uuid = "00000000-0000-0000-0000-000000000000", 84 .pr_children = LIST_HEAD_INITIALIZER(&prison0.pr_children), 85 .pr_flags = PR_HOST, 86 .pr_allow = PR_ALLOW_ALL, 87}; 88MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF); 89 90/* allprison and lastprid are protected by allprison_lock. */ 91struct sx allprison_lock; 92SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); 93struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); 94int lastprid = 0; 95 96static int do_jail_attach(struct thread *td, struct prison *pr); 97static void prison_complete(void *context, int pending); 98static void prison_deref(struct prison *pr, int flags); 99static char *prison_path(struct prison *pr1, struct prison *pr2); 100static void prison_remove_one(struct prison *pr); 101#ifdef INET 102static int _prison_check_ip4(struct prison *pr, struct in_addr *ia); 103static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4); 104#endif 105#ifdef INET6 106static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); 107static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6); 108#endif 109 110/* Flags for prison_deref */ 111#define PD_DEREF 0x01 112#define PD_DEUREF 0x02 113#define PD_LOCKED 0x04 114#define PD_LIST_SLOCKED 0x08 115#define PD_LIST_XLOCKED 0x10 116 117/* 118 * Parameter names corresponding to PR_* flag values 119 */ 120static char *pr_flag_names[] = { 121 [0] = "persist", 122 "host", 123#ifdef INET 124 "ip4", 125#endif 126#ifdef INET6 127 [3] = "ip6", 128#endif 129}; 130 131static char *pr_flag_nonames[] = { 132 [0] = "nopersist", 133 "nohost", 134#ifdef INET 135 "noip4", 136#endif 137#ifdef INET6 138 [3] = "noip6", 139#endif 140}; 141 142static char *pr_allow_names[] = { 143 "allow.set_hostname", 144 "allow.sysvipc", 145 "allow.raw_sockets", 146 "allow.chflags", 147 "allow.mount", 148 "allow.quotas", 149 "allow.jails", 150 "allow.socket_af", 151}; 152 153static char *pr_allow_nonames[] = { 154 "allow.noset_hostname", 155 "allow.nosysvipc", 156 "allow.noraw_sockets", 157 "allow.nochflags", 158 "allow.nomount", 159 "allow.noquotas", 160 "allow.nojails", 161 "allow.nosocket_af", 162}; 163 164#define JAIL_DEFAULT_ALLOW PR_ALLOW_SET_HOSTNAME 165static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; 166static int jail_default_enforce_statfs = 2; 167#if defined(INET) || defined(INET6) 168static unsigned jail_max_af_ips = 255; 169#endif 170 171#ifdef INET 172static int 173qcmp_v4(const void *ip1, const void *ip2) 174{ 175 in_addr_t iaa, iab; 176 177 /* 178 * We need to compare in HBO here to get the list sorted as expected 179 * by the result of the code. Sorting NBO addresses gives you 180 * interesting results. If you do not understand, do not try. 181 */ 182 iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 183 iab = ntohl(((const struct in_addr *)ip2)->s_addr); 184 185 /* 186 * Do not simply return the difference of the two numbers, the int is 187 * not wide enough. 188 */ 189 if (iaa > iab) 190 return (1); 191 else if (iaa < iab) 192 return (-1); 193 else 194 return (0); 195} 196#endif 197 198#ifdef INET6 199static int 200qcmp_v6(const void *ip1, const void *ip2) 201{ 202 const struct in6_addr *ia6a, *ia6b; 203 int i, rc; 204 205 ia6a = (const struct in6_addr *)ip1; 206 ia6b = (const struct in6_addr *)ip2; 207 208 rc = 0; 209 for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { 210 if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) 211 rc = 1; 212 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) 213 rc = -1; 214 } 215 return (rc); 216} 217#endif 218 219/* 220 * struct jail_args { 221 * struct jail *jail; 222 * }; 223 */ 224int 225jail(struct thread *td, struct jail_args *uap) 226{ 227 uint32_t version; 228 int error; 229 struct jail j; 230 231 error = copyin(uap->jail, &version, sizeof(uint32_t)); 232 if (error) 233 return (error); 234 235 switch (version) { 236 case 0: 237 { 238 struct jail_v0 j0; 239 240 /* FreeBSD single IPv4 jails. */ 241 bzero(&j, sizeof(struct jail)); 242 error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); 243 if (error) 244 return (error); 245 j.version = j0.version; 246 j.path = j0.path; 247 j.hostname = j0.hostname; 248 j.ip4s = j0.ip_number; 249 break; 250 } 251 252 case 1: 253 /* 254 * Version 1 was used by multi-IPv4 jail implementations 255 * that never made it into the official kernel. 256 */ 257 return (EINVAL); 258 259 case 2: /* JAIL_API_VERSION */ 260 /* FreeBSD multi-IPv4/IPv6,noIP jails. */ 261 error = copyin(uap->jail, &j, sizeof(struct jail)); 262 if (error) 263 return (error); 264 break; 265 266 default: 267 /* Sci-Fi jails are not supported, sorry. */ 268 return (EINVAL); 269 } 270 return (kern_jail(td, &j)); 271} 272 273int 274kern_jail(struct thread *td, struct jail *j) 275{ 276 struct iovec optiov[2 * (4 277 + sizeof(pr_allow_names) / sizeof(pr_allow_names[0]) 278#ifdef INET 279 + 1 280#endif 281#ifdef INET6 282 + 1 283#endif 284 )]; 285 struct uio opt; 286 char *u_path, *u_hostname, *u_name; 287#ifdef INET 288 uint32_t ip4s; 289 struct in_addr *u_ip4; 290#endif 291#ifdef INET6 292 struct in6_addr *u_ip6; 293#endif 294 size_t tmplen; 295 int error, enforce_statfs, fi; 296 297 bzero(&optiov, sizeof(optiov)); 298 opt.uio_iov = optiov; 299 opt.uio_iovcnt = 0; 300 opt.uio_offset = -1; 301 opt.uio_resid = -1; 302 opt.uio_segflg = UIO_SYSSPACE; 303 opt.uio_rw = UIO_READ; 304 opt.uio_td = td; 305 306 /* Set permissions for top-level jails from sysctls. */ 307 if (!jailed(td->td_ucred)) { 308 for (fi = 0; fi < sizeof(pr_allow_names) / 309 sizeof(pr_allow_names[0]); fi++) { 310 optiov[opt.uio_iovcnt].iov_base = 311 (jail_default_allow & (1 << fi)) 312 ? pr_allow_names[fi] : pr_allow_nonames[fi]; 313 optiov[opt.uio_iovcnt].iov_len = 314 strlen(optiov[opt.uio_iovcnt].iov_base) + 1; 315 opt.uio_iovcnt += 2; 316 } 317 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; 318 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); 319 opt.uio_iovcnt++; 320 enforce_statfs = jail_default_enforce_statfs; 321 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; 322 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); 323 opt.uio_iovcnt++; 324 } 325 326 tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; 327#ifdef INET 328 ip4s = (j->version == 0) ? 1 : j->ip4s; 329 if (ip4s > jail_max_af_ips) 330 return (EINVAL); 331 tmplen += ip4s * sizeof(struct in_addr); 332#else 333 if (j->ip4s > 0) 334 return (EINVAL); 335#endif 336#ifdef INET6 337 if (j->ip6s > jail_max_af_ips) 338 return (EINVAL); 339 tmplen += j->ip6s * sizeof(struct in6_addr); 340#else 341 if (j->ip6s > 0) 342 return (EINVAL); 343#endif 344 u_path = malloc(tmplen, M_TEMP, M_WAITOK); 345 u_hostname = u_path + MAXPATHLEN; 346 u_name = u_hostname + MAXHOSTNAMELEN; 347#ifdef INET 348 u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); 349#endif 350#ifdef INET6 351#ifdef INET 352 u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); 353#else 354 u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); 355#endif 356#endif 357 optiov[opt.uio_iovcnt].iov_base = "path"; 358 optiov[opt.uio_iovcnt].iov_len = sizeof("path"); 359 opt.uio_iovcnt++; 360 optiov[opt.uio_iovcnt].iov_base = u_path; 361 error = copyinstr(j->path, u_path, MAXPATHLEN, 362 &optiov[opt.uio_iovcnt].iov_len); 363 if (error) { 364 free(u_path, M_TEMP); 365 return (error); 366 } 367 opt.uio_iovcnt++; 368 optiov[opt.uio_iovcnt].iov_base = "host.hostname"; 369 optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); 370 opt.uio_iovcnt++; 371 optiov[opt.uio_iovcnt].iov_base = u_hostname; 372 error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, 373 &optiov[opt.uio_iovcnt].iov_len); 374 if (error) { 375 free(u_path, M_TEMP); 376 return (error); 377 } 378 opt.uio_iovcnt++; 379 if (j->jailname != NULL) { 380 optiov[opt.uio_iovcnt].iov_base = "name"; 381 optiov[opt.uio_iovcnt].iov_len = sizeof("name"); 382 opt.uio_iovcnt++; 383 optiov[opt.uio_iovcnt].iov_base = u_name; 384 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, 385 &optiov[opt.uio_iovcnt].iov_len); 386 if (error) { 387 free(u_path, M_TEMP); 388 return (error); 389 } 390 opt.uio_iovcnt++; 391 } 392#ifdef INET 393 optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 394 optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 395 opt.uio_iovcnt++; 396 optiov[opt.uio_iovcnt].iov_base = u_ip4; 397 optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); 398 if (j->version == 0) 399 u_ip4->s_addr = j->ip4s; 400 else { 401 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); 402 if (error) { 403 free(u_path, M_TEMP); 404 return (error); 405 } 406 } 407 opt.uio_iovcnt++; 408#endif 409#ifdef INET6 410 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; 411 optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); 412 opt.uio_iovcnt++; 413 optiov[opt.uio_iovcnt].iov_base = u_ip6; 414 optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); 415 error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); 416 if (error) { 417 free(u_path, M_TEMP); 418 return (error); 419 } 420 opt.uio_iovcnt++; 421#endif 422 KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]), 423 ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); 424 error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); 425 free(u_path, M_TEMP); 426 return (error); 427} 428 429 430/* 431 * struct jail_set_args { 432 * struct iovec *iovp; 433 * unsigned int iovcnt; 434 * int flags; 435 * }; 436 */ 437int 438jail_set(struct thread *td, struct jail_set_args *uap) 439{ 440 struct uio *auio; 441 int error; 442 443 /* Check that we have an even number of iovecs. */ 444 if (uap->iovcnt & 1) 445 return (EINVAL); 446 447 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 448 if (error) 449 return (error); 450 error = kern_jail_set(td, auio, uap->flags); 451 free(auio, M_IOV); 452 return (error); 453} 454 455int 456kern_jail_set(struct thread *td, struct uio *optuio, int flags) 457{ 458 struct nameidata nd; 459#ifdef INET 460 struct in_addr *ip4; 461#endif 462#ifdef INET6 463 struct in6_addr *ip6; 464#endif 465 struct vfsopt *opt; 466 struct vfsoptlist *opts; 467 struct prison *pr, *deadpr, *mypr, *ppr, *tpr; 468 struct vnode *root; 469 char *domain, *errmsg, *host, *name, *p, *path, *uuid; 470#if defined(INET) || defined(INET6) 471 void *op; 472#endif 473 unsigned long hid; 474 size_t namelen, onamelen; 475 int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos; 476 int gotenforce, gothid, gotslevel, fi, jid, len; 477 int slevel, vfslocked; 478#if defined(INET) || defined(INET6) 479 int ii, ij; 480#endif 481#ifdef INET 482 int ip4s, ip4a, redo_ip4; 483#endif 484#ifdef INET6 485 int ip6s, ip6a, redo_ip6; 486#endif 487 unsigned pr_flags, ch_flags; 488 unsigned pr_allow, ch_allow, tallow; 489 char numbuf[12]; 490 491 error = priv_check(td, PRIV_JAIL_SET); 492 if (!error && (flags & JAIL_ATTACH)) 493 error = priv_check(td, PRIV_JAIL_ATTACH); 494 if (error) 495 return (error); 496 mypr = ppr = td->td_ucred->cr_prison; 497 if ((flags & JAIL_CREATE) && !(mypr->pr_allow & PR_ALLOW_JAILS)) 498 return (EPERM); 499 if (flags & ~JAIL_SET_MASK) 500 return (EINVAL); 501 502 /* 503 * Check all the parameters before committing to anything. Not all 504 * errors can be caught early, but we may as well try. Also, this 505 * takes care of some expensive stuff (path lookup) before getting 506 * the allprison lock. 507 * 508 * XXX Jails are not filesystems, and jail parameters are not mount 509 * options. But it makes more sense to re-use the vfsopt code 510 * than duplicate it under a different name. 511 */ 512 error = vfs_buildopts(optuio, &opts); 513 if (error) 514 return (error); 515#ifdef INET 516 ip4a = 0; 517 ip4 = NULL; 518#endif 519#ifdef INET6 520 ip6a = 0; 521 ip6 = NULL; 522#endif 523 524#if defined(INET) || defined(INET6) 525 again: 526#endif 527 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 528 if (error == ENOENT) 529 jid = 0; 530 else if (error != 0) 531 goto done_free; 532 533 error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); 534 if (error == ENOENT) 535 gotslevel = 0; 536 else if (error != 0) 537 goto done_free; 538 else 539 gotslevel = 1; 540 541 error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); 542 gotenforce = (error == 0); 543 if (gotenforce) { 544 if (enforce < 0 || enforce > 2) 545 return (EINVAL); 546 } else if (error != ENOENT) 547 goto done_free; 548 549 pr_flags = ch_flags = 0; 550 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 551 fi++) { 552 if (pr_flag_names[fi] == NULL) 553 continue; 554 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi); 555 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi); 556 } 557 ch_flags |= pr_flags; 558 if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE 559 && !(pr_flags & PR_PERSIST)) { 560 error = EINVAL; 561 vfs_opterror(opts, "new jail must persist or attach"); 562 goto done_errmsg; 563 } 564 565 pr_allow = ch_allow = 0; 566 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 567 fi++) { 568 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi); 569 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi); 570 } 571 ch_allow |= pr_allow; 572 573 error = vfs_getopt(opts, "name", (void **)&name, &len); 574 if (error == ENOENT) 575 name = NULL; 576 else if (error != 0) 577 goto done_free; 578 else { 579 if (len == 0 || name[len - 1] != '\0') { 580 error = EINVAL; 581 goto done_free; 582 } 583 if (len > MAXHOSTNAMELEN) { 584 error = ENAMETOOLONG; 585 goto done_free; 586 } 587 } 588 589 error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); 590 if (error == ENOENT) 591 host = NULL; 592 else if (error != 0) 593 goto done_free; 594 else { 595 ch_flags |= PR_HOST; 596 pr_flags |= PR_HOST; 597 if (len == 0 || host[len - 1] != '\0') { 598 error = EINVAL; 599 goto done_free; 600 } 601 if (len > MAXHOSTNAMELEN) { 602 error = ENAMETOOLONG; 603 goto done_free; 604 } 605 } 606 607 error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); 608 if (error == ENOENT) 609 domain = NULL; 610 else if (error != 0) 611 goto done_free; 612 else { 613 ch_flags |= PR_HOST; 614 pr_flags |= PR_HOST; 615 if (len == 0 || domain[len - 1] != '\0') { 616 error = EINVAL; 617 goto done_free; 618 } 619 if (len > MAXHOSTNAMELEN) { 620 error = ENAMETOOLONG; 621 goto done_free; 622 } 623 } 624 625 error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); 626 if (error == ENOENT) 627 uuid = NULL; 628 else if (error != 0) 629 goto done_free; 630 else { 631 ch_flags |= PR_HOST; 632 pr_flags |= PR_HOST; 633 if (len == 0 || uuid[len - 1] != '\0') { 634 error = EINVAL; 635 goto done_free; 636 } 637 if (len > HOSTUUIDLEN) { 638 error = ENAMETOOLONG; 639 goto done_free; 640 } 641 } 642 643#ifdef COMPAT_IA32 644 if (td->td_proc->p_sysent->sv_flags & SV_IA32) { 645 uint32_t hid32; 646 647 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); 648 hid = hid32; 649 } else 650#endif 651 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); 652 if (error == ENOENT) 653 gothid = 0; 654 else if (error != 0) 655 goto done_free; 656 else { 657 gothid = 1; 658 ch_flags |= PR_HOST; 659 pr_flags |= PR_HOST; 660 } 661 662 /* This might be the second time around for this option. */ 663#ifdef INET 664 error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); 665 if (error == ENOENT) 666 ip4s = -1; 667 else if (error != 0) 668 goto done_free; 669 else if (ip4s & (sizeof(*ip4) - 1)) { 670 error = EINVAL; 671 goto done_free; 672 } else { 673 ch_flags |= PR_IP4_USER; 674 pr_flags |= PR_IP4_USER; 675 if (ip4s > 0) { 676 ip4s /= sizeof(*ip4); 677 if (ip4s > jail_max_af_ips) { 678 error = EINVAL; 679 vfs_opterror(opts, "too many IPv4 addresses"); 680 goto done_errmsg; 681 } 682 if (ip4a < ip4s) { 683 ip4a = ip4s; 684 free(ip4, M_PRISON); 685 ip4 = NULL; 686 } 687 if (ip4 == NULL) 688 ip4 = malloc(ip4a * sizeof(*ip4), M_PRISON, 689 M_WAITOK); 690 bcopy(op, ip4, ip4s * sizeof(*ip4)); 691 /* 692 * IP addresses are all sorted but ip[0] to preserve 693 * the primary IP address as given from userland. 694 * This special IP is used for unbound outgoing 695 * connections as well for "loopback" traffic. 696 */ 697 if (ip4s > 1) 698 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); 699 /* 700 * Check for duplicate addresses and do some simple 701 * zero and broadcast checks. If users give other bogus 702 * addresses it is their problem. 703 * 704 * We do not have to care about byte order for these 705 * checks so we will do them in NBO. 706 */ 707 for (ii = 0; ii < ip4s; ii++) { 708 if (ip4[ii].s_addr == INADDR_ANY || 709 ip4[ii].s_addr == INADDR_BROADCAST) { 710 error = EINVAL; 711 goto done_free; 712 } 713 if ((ii+1) < ip4s && 714 (ip4[0].s_addr == ip4[ii+1].s_addr || 715 ip4[ii].s_addr == ip4[ii+1].s_addr)) { 716 error = EINVAL; 717 goto done_free; 718 } 719 } 720 } 721 } 722#endif 723 724#ifdef INET6 725 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); 726 if (error == ENOENT) 727 ip6s = -1; 728 else if (error != 0) 729 goto done_free; 730 else if (ip6s & (sizeof(*ip6) - 1)) { 731 error = EINVAL; 732 goto done_free; 733 } else { 734 ch_flags |= PR_IP6_USER; 735 pr_flags |= PR_IP6_USER; 736 if (ip6s > 0) { 737 ip6s /= sizeof(*ip6); 738 if (ip6s > jail_max_af_ips) { 739 error = EINVAL; 740 vfs_opterror(opts, "too many IPv6 addresses"); 741 goto done_errmsg; 742 } 743 if (ip6a < ip6s) { 744 ip6a = ip6s; 745 free(ip6, M_PRISON); 746 ip6 = NULL; 747 } 748 if (ip6 == NULL) 749 ip6 = malloc(ip6a * sizeof(*ip6), M_PRISON, 750 M_WAITOK); 751 bcopy(op, ip6, ip6s * sizeof(*ip6)); 752 if (ip6s > 1) 753 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); 754 for (ii = 0; ii < ip6s; ii++) { 755 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) { 756 error = EINVAL; 757 goto done_free; 758 } 759 if ((ii+1) < ip6s && 760 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || 761 IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) 762 { 763 error = EINVAL; 764 goto done_free; 765 } 766 } 767 } 768 } 769#endif 770 771 root = NULL; 772 error = vfs_getopt(opts, "path", (void **)&path, &len); 773 if (error == ENOENT) 774 path = NULL; 775 else if (error != 0) 776 goto done_free; 777 else { 778 if (flags & JAIL_UPDATE) { 779 error = EINVAL; 780 vfs_opterror(opts, 781 "path cannot be changed after creation"); 782 goto done_errmsg; 783 } 784 if (len == 0 || path[len - 1] != '\0') { 785 error = EINVAL; 786 goto done_free; 787 } 788 if (len < 2 || (len == 2 && path[0] == '/')) 789 path = NULL; 790 else { 791 /* Leave room for a real-root full pathname. */ 792 if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") 793 ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { 794 error = ENAMETOOLONG; 795 goto done_free; 796 } 797 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_SYSSPACE, 798 path, td); 799 error = namei(&nd); 800 if (error) 801 goto done_free; 802 vfslocked = NDHASGIANT(&nd); 803 root = nd.ni_vp; 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 if (root->v_type != VDIR) { 806 error = ENOTDIR; 807 vrele(root); 808 VFS_UNLOCK_GIANT(vfslocked); 809 goto done_free; 810 } 811 VFS_UNLOCK_GIANT(vfslocked); 812 } 813 } 814 815 /* 816 * Grab the allprison lock before letting modules check their 817 * parameters. Once we have it, do not let go so we'll have a 818 * consistent view of the OSD list. 819 */ 820 sx_xlock(&allprison_lock); 821 error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); 822 if (error) 823 goto done_unlock_list; 824 825 /* By now, all parameters should have been noted. */ 826 TAILQ_FOREACH(opt, opts, link) { 827 if (!opt->seen && strcmp(opt->name, "errmsg")) { 828 error = EINVAL; 829 vfs_opterror(opts, "unknown parameter: %s", opt->name); 830 goto done_unlock_list; 831 } 832 } 833 834 /* 835 * See if we are creating a new record or updating an existing one. 836 * This abuses the file error codes ENOENT and EEXIST. 837 */ 838 cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); 839 if (!cuflags) { 840 error = EINVAL; 841 vfs_opterror(opts, "no valid operation (create or update)"); 842 goto done_unlock_list; 843 } 844 pr = NULL; 845 if (jid != 0) { 846 /* 847 * See if a requested jid already exists. There is an 848 * information leak here if the jid exists but is not within 849 * the caller's jail hierarchy. Jail creators will get EEXIST 850 * even though they cannot see the jail, and CREATE | UPDATE 851 * will return ENOENT which is not normally a valid error. 852 */ 853 if (jid < 0) { 854 error = EINVAL; 855 vfs_opterror(opts, "negative jid"); 856 goto done_unlock_list; 857 } 858 pr = prison_find(jid); 859 if (pr != NULL) { 860 ppr = pr->pr_parent; 861 /* Create: jid must not exist. */ 862 if (cuflags == JAIL_CREATE) { 863 mtx_unlock(&pr->pr_mtx); 864 error = EEXIST; 865 vfs_opterror(opts, "jail %d already exists", 866 jid); 867 goto done_unlock_list; 868 } 869 if (!prison_ischild(mypr, pr)) { 870 mtx_unlock(&pr->pr_mtx); 871 pr = NULL; 872 } else if (pr->pr_uref == 0) { 873 if (!(flags & JAIL_DYING)) { 874 mtx_unlock(&pr->pr_mtx); 875 error = ENOENT; 876 vfs_opterror(opts, "jail %d is dying", 877 jid); 878 goto done_unlock_list; 879 } else if ((flags & JAIL_ATTACH) || 880 (pr_flags & PR_PERSIST)) { 881 /* 882 * A dying jail might be resurrected 883 * (via attach or persist), but first 884 * it must determine if another jail 885 * has claimed its name. Accomplish 886 * this by implicitly re-setting the 887 * name. 888 */ 889 if (name == NULL) 890 name = prison_name(mypr, pr); 891 } 892 } 893 } 894 if (pr == NULL) { 895 /* Update: jid must exist. */ 896 if (cuflags == JAIL_UPDATE) { 897 error = ENOENT; 898 vfs_opterror(opts, "jail %d not found", jid); 899 goto done_unlock_list; 900 } 901 } 902 } 903 /* 904 * If the caller provided a name, look for a jail by that name. 905 * This has different semantics for creates and updates keyed by jid 906 * (where the name must not already exist in a different jail), 907 * and updates keyed by the name itself (where the name must exist 908 * because that is the jail being updated). 909 */ 910 if (name != NULL) { 911 p = strrchr(name, '.'); 912 if (p != NULL) { 913 /* 914 * This is a hierarchical name. Split it into the 915 * parent and child names, and make sure the parent 916 * exists or matches an already found jail. 917 */ 918 *p = '\0'; 919 if (pr != NULL) { 920 if (strncmp(name, ppr->pr_name, p - name) || 921 ppr->pr_name[p - name] != '\0') { 922 mtx_unlock(&pr->pr_mtx); 923 error = EINVAL; 924 vfs_opterror(opts, 925 "cannot change jail's parent"); 926 goto done_unlock_list; 927 } 928 } else { 929 ppr = prison_find_name(mypr, name); 930 if (ppr == NULL) { 931 error = ENOENT; 932 vfs_opterror(opts, 933 "jail \"%s\" not found", name); 934 goto done_unlock_list; 935 } 936 mtx_unlock(&ppr->pr_mtx); 937 } 938 name = p + 1; 939 } 940 if (name[0] != '\0') { 941 namelen = 942 (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; 943 name_again: 944 deadpr = NULL; 945 FOREACH_PRISON_CHILD(ppr, tpr) { 946 if (tpr != pr && tpr->pr_ref > 0 && 947 !strcmp(tpr->pr_name + namelen, name)) { 948 if (pr == NULL && 949 cuflags != JAIL_CREATE) { 950 mtx_lock(&tpr->pr_mtx); 951 if (tpr->pr_ref > 0) { 952 /* 953 * Use this jail 954 * for updates. 955 */ 956 if (tpr->pr_uref > 0) { 957 pr = tpr; 958 break; 959 } 960 deadpr = tpr; 961 } 962 mtx_unlock(&tpr->pr_mtx); 963 } else if (tpr->pr_uref > 0) { 964 /* 965 * Create, or update(jid): 966 * name must not exist in an 967 * active sibling jail. 968 */ 969 error = EEXIST; 970 if (pr != NULL) 971 mtx_unlock(&pr->pr_mtx); 972 vfs_opterror(opts, 973 "jail \"%s\" already exists", 974 name); 975 goto done_unlock_list; 976 } 977 } 978 } 979 /* If no active jail is found, use a dying one. */ 980 if (deadpr != NULL && pr == NULL) { 981 if (flags & JAIL_DYING) { 982 mtx_lock(&deadpr->pr_mtx); 983 if (deadpr->pr_ref == 0) { 984 mtx_unlock(&deadpr->pr_mtx); 985 goto name_again; 986 } 987 pr = deadpr; 988 } else if (cuflags == JAIL_UPDATE) { 989 error = ENOENT; 990 vfs_opterror(opts, 991 "jail \"%s\" is dying", name); 992 goto done_unlock_list; 993 } 994 } 995 /* Update: name must exist if no jid. */ 996 else if (cuflags == JAIL_UPDATE && pr == NULL) { 997 error = ENOENT; 998 vfs_opterror(opts, "jail \"%s\" not found", 999 name); 1000 goto done_unlock_list; 1001 } 1002 } 1003 } 1004 /* Update: must provide a jid or name. */ 1005 else if (cuflags == JAIL_UPDATE && pr == NULL) { 1006 error = ENOENT; 1007 vfs_opterror(opts, "update specified no jail"); 1008 goto done_unlock_list; 1009 } 1010 1011 /* If there's no prison to update, create a new one and link it in. */ 1012 if (pr == NULL) { 1013 created = 1; 1014 mtx_lock(&ppr->pr_mtx); 1015 if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) { 1016 mtx_unlock(&ppr->pr_mtx); 1017 error = ENOENT; 1018 vfs_opterror(opts, "parent jail went away!"); 1019 goto done_unlock_list; 1020 } 1021 ppr->pr_ref++; 1022 ppr->pr_uref++; 1023 mtx_unlock(&ppr->pr_mtx); 1024 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 1025 if (jid == 0) { 1026 /* Find the next free jid. */ 1027 jid = lastprid + 1; 1028 findnext: 1029 if (jid == JAIL_MAX) 1030 jid = 1; 1031 TAILQ_FOREACH(tpr, &allprison, pr_list) { 1032 if (tpr->pr_id < jid) 1033 continue; 1034 if (tpr->pr_id > jid || tpr->pr_ref == 0) { 1035 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1036 break; 1037 } 1038 if (jid == lastprid) { 1039 error = EAGAIN; 1040 vfs_opterror(opts, 1041 "no available jail IDs"); 1042 free(pr, M_PRISON); 1043 prison_deref(ppr, PD_DEREF | 1044 PD_DEUREF | PD_LIST_XLOCKED); 1045 goto done_releroot; 1046 } 1047 jid++; 1048 goto findnext; 1049 } 1050 lastprid = jid; 1051 } else { 1052 /* 1053 * The jail already has a jid (that did not yet exist), 1054 * so just find where to insert it. 1055 */ 1056 TAILQ_FOREACH(tpr, &allprison, pr_list) 1057 if (tpr->pr_id >= jid) { 1058 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1059 break; 1060 } 1061 } 1062 if (tpr == NULL) 1063 TAILQ_INSERT_TAIL(&allprison, pr, pr_list); 1064 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); 1065 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 1066 tpr->pr_prisoncount++; 1067 1068 pr->pr_parent = ppr; 1069 pr->pr_id = jid; 1070 1071 /* Set some default values, and inherit some from the parent. */ 1072 if (name == NULL) 1073 name = ""; 1074 if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1075 if (host == NULL) 1076 host = ppr->pr_host; 1077 if (domain == NULL) 1078 domain = ppr->pr_domain; 1079 if (uuid == NULL) 1080 uuid = ppr->pr_uuid; 1081 if (!gothid) 1082 hid = ppr->pr_hostid; 1083 } 1084 if (path == NULL) { 1085 path = "/"; 1086 root = mypr->pr_root; 1087 vref(root); 1088 } 1089#ifdef INET 1090 pr->pr_flags |= ppr->pr_flags & PR_IP4; 1091 pr->pr_ip4s = ppr->pr_ip4s; 1092 if (ppr->pr_ip4 != NULL) { 1093 pr->pr_ip4 = malloc(pr->pr_ip4s * 1094 sizeof(struct in_addr), M_PRISON, M_WAITOK); 1095 bcopy(ppr->pr_ip4, pr->pr_ip4, 1096 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1097 } 1098#endif 1099#ifdef INET6 1100 pr->pr_flags |= ppr->pr_flags & PR_IP6; 1101 pr->pr_ip6s = ppr->pr_ip6s; 1102 if (ppr->pr_ip6 != NULL) { 1103 pr->pr_ip6 = malloc(pr->pr_ip6s * 1104 sizeof(struct in6_addr), M_PRISON, M_WAITOK); 1105 bcopy(ppr->pr_ip6, pr->pr_ip6, 1106 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1107 } 1108#endif 1109 pr->pr_securelevel = ppr->pr_securelevel; 1110 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; 1111 pr->pr_enforce_statfs = ppr->pr_enforce_statfs; 1112 1113 LIST_INIT(&pr->pr_children); 1114 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); 1115 1116 /* 1117 * Allocate a dedicated cpuset for each jail. 1118 * Unlike other initial settings, this may return an erorr. 1119 */ 1120 error = cpuset_create_root(ppr, &pr->pr_cpuset); 1121 if (error) { 1122 prison_deref(pr, PD_LIST_XLOCKED); 1123 goto done_releroot; 1124 } 1125 1126 mtx_lock(&pr->pr_mtx); 1127 /* 1128 * New prisons do not yet have a reference, because we do not 1129 * want other to see the incomplete prison once the 1130 * allprison_lock is downgraded. 1131 */ 1132 } else { 1133 created = 0; 1134 /* 1135 * Grab a reference for existing prisons, to ensure they 1136 * continue to exist for the duration of the call. 1137 */ 1138 pr->pr_ref++; 1139 } 1140 1141 /* Do final error checking before setting anything. */ 1142 if (gotslevel) { 1143 if (slevel < ppr->pr_securelevel) { 1144 error = EPERM; 1145 goto done_deref_locked; 1146 } 1147 } 1148 if (gotenforce) { 1149 if (enforce < ppr->pr_enforce_statfs) { 1150 error = EPERM; 1151 goto done_deref_locked; 1152 } 1153 } 1154#ifdef INET 1155 if (ch_flags & PR_IP4_USER) { 1156 if (ppr->pr_flags & PR_IP4) { 1157 if (!(pr_flags & PR_IP4_USER)) { 1158 /* 1159 * Silently ignore attempts to make the IP 1160 * addresses unrestricted when the parent is 1161 * restricted; in other words, interpret 1162 * "unrestricted" as "as unrestricted as 1163 * possible". 1164 */ 1165 ip4s = ppr->pr_ip4s; 1166 if (ip4s == 0) { 1167 free(ip4, M_PRISON); 1168 ip4 = NULL; 1169 } else if (ip4s <= ip4a) { 1170 /* Inherit the parent's address(es). */ 1171 bcopy(ppr->pr_ip4, ip4, 1172 ip4s * sizeof(*ip4)); 1173 } else { 1174 /* 1175 * There's no room for the parent's 1176 * address list. Allocate some more. 1177 */ 1178 ip4a = ip4s; 1179 free(ip4, M_PRISON); 1180 ip4 = malloc(ip4a * sizeof(*ip4), 1181 M_PRISON, M_NOWAIT); 1182 if (ip4 != NULL) 1183 bcopy(ppr->pr_ip4, ip4, 1184 ip4s * sizeof(*ip4)); 1185 else { 1186 /* Allocation failed without 1187 * sleeping. Unlocking the 1188 * prison now will invalidate 1189 * some checks and prematurely 1190 * show an unfinished new jail. 1191 * So let go of everything and 1192 * start over. 1193 */ 1194 prison_deref(pr, created 1195 ? PD_LOCKED | 1196 PD_LIST_XLOCKED 1197 : PD_DEREF | PD_LOCKED | 1198 PD_LIST_XLOCKED); 1199 if (root != NULL) { 1200 vfslocked = 1201 VFS_LOCK_GIANT( 1202 root->v_mount); 1203 vrele(root); 1204 VFS_UNLOCK_GIANT( 1205 vfslocked); 1206 } 1207 ip4 = malloc(ip4a * 1208 sizeof(*ip4), M_PRISON, 1209 M_WAITOK); 1210 goto again; 1211 } 1212 } 1213 } else if (ip4s > 0) { 1214 /* 1215 * Make sure the new set of IP addresses is a 1216 * subset of the parent's list. Don't worry 1217 * about the parent being unlocked, as any 1218 * setting is done with allprison_lock held. 1219 */ 1220 for (ij = 0; ij < ppr->pr_ip4s; ij++) 1221 if (ip4[0].s_addr == 1222 ppr->pr_ip4[ij].s_addr) 1223 break; 1224 if (ij == ppr->pr_ip4s) { 1225 error = EPERM; 1226 goto done_deref_locked; 1227 } 1228 if (ip4s > 1) { 1229 for (ii = ij = 1; ii < ip4s; ii++) { 1230 if (ip4[ii].s_addr == 1231 ppr->pr_ip4[0].s_addr) 1232 continue; 1233 for (; ij < ppr->pr_ip4s; ij++) 1234 if (ip4[ii].s_addr == 1235 ppr->pr_ip4[ij].s_addr) 1236 break; 1237 if (ij == ppr->pr_ip4s) 1238 break; 1239 } 1240 if (ij == ppr->pr_ip4s) { 1241 error = EPERM; 1242 goto done_deref_locked; 1243 } 1244 } 1245 } 1246 } 1247 if (ip4s > 0) { 1248 /* 1249 * Check for conflicting IP addresses. We permit them 1250 * if there is no more than one IP on each jail. If 1251 * there is a duplicate on a jail with more than one 1252 * IP stop checking and return error. 1253 */ 1254 FOREACH_PRISON_DESCENDANT(&prison0, tpr, descend) { 1255 if (tpr == pr || tpr->pr_uref == 0) { 1256 descend = 0; 1257 continue; 1258 } 1259 if (!(tpr->pr_flags & PR_IP4_USER)) 1260 continue; 1261 descend = 0; 1262 if (tpr->pr_ip4 == NULL || 1263 (ip4s == 1 && tpr->pr_ip4s == 1)) 1264 continue; 1265 for (ii = 0; ii < ip4s; ii++) { 1266 if (_prison_check_ip4(tpr, 1267 &ip4[ii]) == 0) { 1268 error = EADDRINUSE; 1269 vfs_opterror(opts, 1270 "IPv4 addresses clash"); 1271 goto done_deref_locked; 1272 } 1273 } 1274 } 1275 } 1276 } 1277#endif 1278#ifdef INET6 1279 if (ch_flags & PR_IP6_USER) { 1280 if (ppr->pr_flags & PR_IP6) { 1281 if (!(pr_flags & PR_IP6_USER)) { 1282 /* 1283 * Silently ignore attempts to make the IP 1284 * addresses unrestricted when the parent is 1285 * restricted. 1286 */ 1287 ip6s = ppr->pr_ip6s; 1288 if (ip6s == 0) { 1289 free(ip6, M_PRISON); 1290 ip6 = NULL; 1291 } else if (ip6s <= ip6a) { 1292 /* Inherit the parent's address(es). */ 1293 bcopy(ppr->pr_ip6, ip6, 1294 ip6s * sizeof(*ip6)); 1295 } else { 1296 /* 1297 * There's no room for the parent's 1298 * address list. 1299 */ 1300 ip6a = ip6s; 1301 free(ip6, M_PRISON); 1302 ip6 = malloc(ip6a * sizeof(*ip6), 1303 M_PRISON, M_NOWAIT); 1304 if (ip6 != NULL) 1305 bcopy(ppr->pr_ip6, ip6, 1306 ip6s * sizeof(*ip6)); 1307 else { 1308 prison_deref(pr, created 1309 ? PD_LOCKED | 1310 PD_LIST_XLOCKED 1311 : PD_DEREF | PD_LOCKED | 1312 PD_LIST_XLOCKED); 1313 if (root != NULL) { 1314 vfslocked = 1315 VFS_LOCK_GIANT( 1316 root->v_mount); 1317 vrele(root); 1318 VFS_UNLOCK_GIANT( 1319 vfslocked); 1320 } 1321 ip6 = malloc(ip6a * 1322 sizeof(*ip6), M_PRISON, 1323 M_WAITOK); 1324 goto again; 1325 } 1326 } 1327 } else if (ip6s > 0) { 1328 /* 1329 * Make sure the new set of IP addresses is a 1330 * subset of the parent's list. 1331 */ 1332 for (ij = 0; ij < ppr->pr_ip6s; ij++) 1333 if (IN6_ARE_ADDR_EQUAL(&ip6[0], 1334 &ppr->pr_ip6[ij])) 1335 break; 1336 if (ij == ppr->pr_ip6s) { 1337 error = EPERM; 1338 goto done_deref_locked; 1339 } 1340 if (ip6s > 1) { 1341 for (ii = ij = 1; ii < ip6s; ii++) { 1342 if (IN6_ARE_ADDR_EQUAL(&ip6[ii], 1343 &ppr->pr_ip6[0])) 1344 continue; 1345 for (; ij < ppr->pr_ip6s; ij++) 1346 if (IN6_ARE_ADDR_EQUAL( 1347 &ip6[ii], 1348 &ppr->pr_ip6[ij])) 1349 break; 1350 if (ij == ppr->pr_ip6s) 1351 break; 1352 } 1353 if (ij == ppr->pr_ip6s) { 1354 error = EPERM; 1355 goto done_deref_locked; 1356 } 1357 } 1358 } 1359 } 1360 if (ip6s > 0) { 1361 /* Check for conflicting IP addresses. */ 1362 FOREACH_PRISON_DESCENDANT(&prison0, tpr, descend) { 1363 if (tpr == pr || tpr->pr_uref == 0) { 1364 descend = 0; 1365 continue; 1366 } 1367 if (!(tpr->pr_flags & PR_IP6_USER)) 1368 continue; 1369 descend = 0; 1370 if (tpr->pr_ip6 == NULL || 1371 (ip6s == 1 && tpr->pr_ip6s == 1)) 1372 continue; 1373 for (ii = 0; ii < ip6s; ii++) { 1374 if (_prison_check_ip6(tpr, 1375 &ip6[ii]) == 0) { 1376 error = EADDRINUSE; 1377 vfs_opterror(opts, 1378 "IPv6 addresses clash"); 1379 goto done_deref_locked; 1380 } 1381 } 1382 } 1383 } 1384 } 1385#endif 1386 onamelen = namelen = 0; 1387 if (name != NULL) { 1388 /* Give a default name of the jid. */ 1389 if (name[0] == '\0') 1390 snprintf(name = numbuf, sizeof(numbuf), "%d", jid); 1391 else if (strtoul(name, &p, 10) != jid && *p == '\0') { 1392 error = EINVAL; 1393 vfs_opterror(opts, "name cannot be numeric"); 1394 goto done_deref_locked; 1395 } 1396 /* 1397 * Make sure the name isn't too long for the prison or its 1398 * children. 1399 */ 1400 onamelen = strlen(pr->pr_name); 1401 namelen = strlen(name); 1402 if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) { 1403 error = ENAMETOOLONG; 1404 goto done_deref_locked; 1405 } 1406 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1407 if (strlen(tpr->pr_name) + (namelen - onamelen) >= 1408 sizeof(pr->pr_name)) { 1409 error = ENAMETOOLONG; 1410 goto done_deref_locked; 1411 } 1412 } 1413 } 1414 if (pr_allow & ~ppr->pr_allow) { 1415 error = EPERM; 1416 goto done_deref_locked; 1417 } 1418 1419 /* Set the parameters of the prison. */ 1420#ifdef INET 1421 redo_ip4 = 0; 1422 if (ch_flags & PR_IP4_USER) { 1423 if (pr_flags & PR_IP4_USER) { 1424 /* Some restriction set. */ 1425 pr->pr_flags |= PR_IP4; 1426 if (ip4s >= 0) { 1427 free(pr->pr_ip4, M_PRISON); 1428 pr->pr_ip4s = ip4s; 1429 pr->pr_ip4 = ip4; 1430 ip4 = NULL; 1431 } 1432 } else if (ppr->pr_flags & PR_IP4) { 1433 /* This restriction cleared, but keep inherited. */ 1434 free(pr->pr_ip4, M_PRISON); 1435 pr->pr_ip4s = ip4s; 1436 pr->pr_ip4 = ip4; 1437 ip4 = NULL; 1438 } else { 1439 /* Restriction cleared, now unrestricted. */ 1440 pr->pr_flags &= ~PR_IP4; 1441 free(pr->pr_ip4, M_PRISON); 1442 pr->pr_ip4s = 0; 1443 } 1444 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1445 if (prison_restrict_ip4(tpr, NULL)) { 1446 redo_ip4 = 1; 1447 descend = 0; 1448 } 1449 } 1450 } 1451#endif 1452#ifdef INET6 1453 redo_ip6 = 0; 1454 if (ch_flags & PR_IP6_USER) { 1455 if (pr_flags & PR_IP6_USER) { 1456 /* Some restriction set. */ 1457 pr->pr_flags |= PR_IP6; 1458 if (ip6s >= 0) { 1459 free(pr->pr_ip6, M_PRISON); 1460 pr->pr_ip6s = ip6s; 1461 pr->pr_ip6 = ip6; 1462 ip6 = NULL; 1463 } 1464 } else if (ppr->pr_flags & PR_IP6) { 1465 /* This restriction cleared, but keep inherited. */ 1466 free(pr->pr_ip6, M_PRISON); 1467 pr->pr_ip6s = ip6s; 1468 pr->pr_ip6 = ip6; 1469 ip6 = NULL; 1470 } else { 1471 /* Restriction cleared, now unrestricted. */ 1472 pr->pr_flags &= ~PR_IP6; 1473 free(pr->pr_ip6, M_PRISON); 1474 pr->pr_ip6s = 0; 1475 } 1476 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1477 if (prison_restrict_ip6(tpr, NULL)) { 1478 redo_ip6 = 1; 1479 descend = 0; 1480 } 1481 } 1482 } 1483#endif 1484 if (gotslevel) { 1485 pr->pr_securelevel = slevel; 1486 /* Set all child jails to be at least this level. */ 1487 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1488 if (tpr->pr_securelevel < slevel) 1489 tpr->pr_securelevel = slevel; 1490 } 1491 if (gotenforce) { 1492 pr->pr_enforce_statfs = enforce; 1493 /* Pass this restriction on to the children. */ 1494 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1495 if (tpr->pr_enforce_statfs < enforce) 1496 tpr->pr_enforce_statfs = enforce; 1497 } 1498 if (name != NULL) { 1499 if (ppr == &prison0) 1500 strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); 1501 else 1502 snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", 1503 ppr->pr_name, name); 1504 /* Change this component of child names. */ 1505 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1506 bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, 1507 strlen(tpr->pr_name + onamelen) + 1); 1508 bcopy(pr->pr_name, tpr->pr_name, namelen); 1509 } 1510 } 1511 if (path != NULL) { 1512 /* Try to keep a real-rooted full pathname. */ 1513 if (path[0] == '/' && strcmp(mypr->pr_path, "/")) 1514 snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s", 1515 mypr->pr_path, path); 1516 else 1517 strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); 1518 pr->pr_root = root; 1519 } 1520 if (PR_HOST & ch_flags & ~pr_flags) { 1521 if (pr->pr_flags & PR_HOST) { 1522 /* 1523 * Copy the parent's host info. As with pr_ip4 above, 1524 * the lack of a lock on the parent is not a problem; 1525 * it is always set with allprison_lock at least 1526 * shared, and is held exclusively here. 1527 */ 1528 strlcpy(pr->pr_host, pr->pr_parent->pr_host, 1529 sizeof(pr->pr_host)); 1530 strlcpy(pr->pr_domain, pr->pr_parent->pr_domain, 1531 sizeof(pr->pr_domain)); 1532 strlcpy(pr->pr_uuid, pr->pr_parent->pr_uuid, 1533 sizeof(pr->pr_uuid)); 1534 pr->pr_hostid = pr->pr_parent->pr_hostid; 1535 } 1536 } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1537 /* Set this prison, and any descendants without PR_HOST. */ 1538 if (host != NULL) 1539 strlcpy(pr->pr_host, host, sizeof(pr->pr_host)); 1540 if (domain != NULL) 1541 strlcpy(pr->pr_domain, domain, sizeof(pr->pr_domain)); 1542 if (uuid != NULL) 1543 strlcpy(pr->pr_uuid, uuid, sizeof(pr->pr_uuid)); 1544 if (gothid) 1545 pr->pr_hostid = hid; 1546 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1547 if (tpr->pr_flags & PR_HOST) 1548 descend = 0; 1549 else { 1550 if (host != NULL) 1551 strlcpy(tpr->pr_host, pr->pr_host, 1552 sizeof(tpr->pr_host)); 1553 if (domain != NULL) 1554 strlcpy(tpr->pr_domain, pr->pr_domain, 1555 sizeof(tpr->pr_domain)); 1556 if (uuid != NULL) 1557 strlcpy(tpr->pr_uuid, pr->pr_uuid, 1558 sizeof(tpr->pr_uuid)); 1559 if (gothid) 1560 tpr->pr_hostid = hid; 1561 } 1562 } 1563 } 1564 if ((tallow = ch_allow & ~pr_allow)) { 1565 /* Clear allow bits in all children. */ 1566 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1567 tpr->pr_allow &= ~tallow; 1568 } 1569 pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; 1570 /* 1571 * Persistent prisons get an extra reference, and prisons losing their 1572 * persist flag lose that reference. Only do this for existing prisons 1573 * for now, so new ones will remain unseen until after the module 1574 * handlers have completed. 1575 */ 1576 if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { 1577 if (pr_flags & PR_PERSIST) { 1578 pr->pr_ref++; 1579 pr->pr_uref++; 1580 } else { 1581 pr->pr_ref--; 1582 pr->pr_uref--; 1583 } 1584 } 1585 pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; 1586 mtx_unlock(&pr->pr_mtx); 1587 1588 /* Locks may have prevented a complete restriction of child IP 1589 * addresses. If so, allocate some more memory and try again. 1590 */ 1591#ifdef INET 1592 while (redo_ip4) { 1593 ip4s = pr->pr_ip4s; 1594 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 1595 mtx_lock(&pr->pr_mtx); 1596 redo_ip4 = 0; 1597 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1598 if (prison_restrict_ip4(tpr, ip4)) { 1599 if (ip4 != NULL) 1600 ip4 = NULL; 1601 else 1602 redo_ip4 = 1; 1603 } 1604 } 1605 mtx_unlock(&pr->pr_mtx); 1606 } 1607#endif 1608#ifdef INET6 1609 while (redo_ip6) { 1610 ip6s = pr->pr_ip6s; 1611 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 1612 mtx_lock(&pr->pr_mtx); 1613 redo_ip6 = 0; 1614 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1615 if (prison_restrict_ip6(tpr, ip6)) { 1616 if (ip6 != NULL) 1617 ip6 = NULL; 1618 else 1619 redo_ip6 = 1; 1620 } 1621 } 1622 mtx_unlock(&pr->pr_mtx); 1623 } 1624#endif 1625 1626 /* Let the modules do their work. */ 1627 sx_downgrade(&allprison_lock); 1628 if (created) { 1629 error = osd_jail_call(pr, PR_METHOD_CREATE, opts); 1630 if (error) { 1631 prison_deref(pr, PD_LIST_SLOCKED); 1632 goto done_errmsg; 1633 } 1634 } 1635 error = osd_jail_call(pr, PR_METHOD_SET, opts); 1636 if (error) { 1637 prison_deref(pr, created 1638 ? PD_LIST_SLOCKED 1639 : PD_DEREF | PD_LIST_SLOCKED); 1640 goto done_errmsg; 1641 } 1642 1643 /* Attach this process to the prison if requested. */ 1644 if (flags & JAIL_ATTACH) { 1645 mtx_lock(&pr->pr_mtx); 1646 error = do_jail_attach(td, pr); 1647 if (error) { 1648 vfs_opterror(opts, "attach failed"); 1649 if (!created) 1650 prison_deref(pr, PD_DEREF); 1651 goto done_errmsg; 1652 } 1653 } 1654 1655 /* 1656 * Now that it is all there, drop the temporary reference from existing 1657 * prisons. Or add a reference to newly created persistent prisons 1658 * (which was not done earlier so that the prison would not be publicly 1659 * visible). 1660 */ 1661 if (!created) { 1662 prison_deref(pr, (flags & JAIL_ATTACH) 1663 ? PD_DEREF 1664 : PD_DEREF | PD_LIST_SLOCKED); 1665 } else { 1666 if (pr_flags & PR_PERSIST) { 1667 mtx_lock(&pr->pr_mtx); 1668 pr->pr_ref++; 1669 pr->pr_uref++; 1670 mtx_unlock(&pr->pr_mtx); 1671 } 1672 if (!(flags & JAIL_ATTACH)) 1673 sx_sunlock(&allprison_lock); 1674 } 1675 td->td_retval[0] = pr->pr_id; 1676 goto done_errmsg; 1677 1678 done_deref_locked: 1679 prison_deref(pr, created 1680 ? PD_LOCKED | PD_LIST_XLOCKED 1681 : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 1682 goto done_releroot; 1683 done_unlock_list: 1684 sx_xunlock(&allprison_lock); 1685 done_releroot: 1686 if (root != NULL) { 1687 vfslocked = VFS_LOCK_GIANT(root->v_mount); 1688 vrele(root); 1689 VFS_UNLOCK_GIANT(vfslocked); 1690 } 1691 done_errmsg: 1692 if (error) { 1693 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1694 if (errmsg_len > 0) { 1695 errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; 1696 if (errmsg_pos > 0) { 1697 if (optuio->uio_segflg == UIO_SYSSPACE) 1698 bcopy(errmsg, 1699 optuio->uio_iov[errmsg_pos].iov_base, 1700 errmsg_len); 1701 else 1702 copyout(errmsg, 1703 optuio->uio_iov[errmsg_pos].iov_base, 1704 errmsg_len); 1705 } 1706 } 1707 } 1708 done_free: 1709#ifdef INET 1710 free(ip4, M_PRISON); 1711#endif 1712#ifdef INET6 1713 free(ip6, M_PRISON); 1714#endif 1715 vfs_freeopts(opts); 1716 return (error); 1717} 1718 1719 1720/* 1721 * struct jail_get_args { 1722 * struct iovec *iovp; 1723 * unsigned int iovcnt; 1724 * int flags; 1725 * }; 1726 */ 1727int 1728jail_get(struct thread *td, struct jail_get_args *uap) 1729{ 1730 struct uio *auio; 1731 int error; 1732 1733 /* Check that we have an even number of iovecs. */ 1734 if (uap->iovcnt & 1) 1735 return (EINVAL); 1736 1737 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 1738 if (error) 1739 return (error); 1740 error = kern_jail_get(td, auio, uap->flags); 1741 if (error == 0) 1742 error = copyout(auio->uio_iov, uap->iovp, 1743 uap->iovcnt * sizeof (struct iovec)); 1744 free(auio, M_IOV); 1745 return (error); 1746} 1747 1748int 1749kern_jail_get(struct thread *td, struct uio *optuio, int flags) 1750{ 1751 struct prison *pr, *mypr; 1752 struct vfsopt *opt; 1753 struct vfsoptlist *opts; 1754 char *errmsg, *name; 1755 int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos; 1756 1757 if (flags & ~JAIL_GET_MASK) 1758 return (EINVAL); 1759 1760 /* Get the parameter list. */ 1761 error = vfs_buildopts(optuio, &opts); 1762 if (error) 1763 return (error); 1764 errmsg_pos = vfs_getopt_pos(opts, "errmsg"); 1765 mypr = td->td_ucred->cr_prison; 1766 1767 /* 1768 * Find the prison specified by one of: lastjid, jid, name. 1769 */ 1770 sx_slock(&allprison_lock); 1771 error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); 1772 if (error == 0) { 1773 TAILQ_FOREACH(pr, &allprison, pr_list) { 1774 if (pr->pr_id > jid && prison_ischild(mypr, pr)) { 1775 mtx_lock(&pr->pr_mtx); 1776 if (pr->pr_ref > 0 && 1777 (pr->pr_uref > 0 || (flags & JAIL_DYING))) 1778 break; 1779 mtx_unlock(&pr->pr_mtx); 1780 } 1781 } 1782 if (pr != NULL) 1783 goto found_prison; 1784 error = ENOENT; 1785 vfs_opterror(opts, "no jail after %d", jid); 1786 goto done_unlock_list; 1787 } else if (error != ENOENT) 1788 goto done_unlock_list; 1789 1790 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 1791 if (error == 0) { 1792 if (jid != 0) { 1793 pr = prison_find_child(mypr, jid); 1794 if (pr != NULL) { 1795 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1796 mtx_unlock(&pr->pr_mtx); 1797 error = ENOENT; 1798 vfs_opterror(opts, "jail %d is dying", 1799 jid); 1800 goto done_unlock_list; 1801 } 1802 goto found_prison; 1803 } 1804 error = ENOENT; 1805 vfs_opterror(opts, "jail %d not found", jid); 1806 goto done_unlock_list; 1807 } 1808 } else if (error != ENOENT) 1809 goto done_unlock_list; 1810 1811 error = vfs_getopt(opts, "name", (void **)&name, &len); 1812 if (error == 0) { 1813 if (len == 0 || name[len - 1] != '\0') { 1814 error = EINVAL; 1815 goto done_unlock_list; 1816 } 1817 pr = prison_find_name(mypr, name); 1818 if (pr != NULL) { 1819 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1820 mtx_unlock(&pr->pr_mtx); 1821 error = ENOENT; 1822 vfs_opterror(opts, "jail \"%s\" is dying", 1823 name); 1824 goto done_unlock_list; 1825 } 1826 goto found_prison; 1827 } 1828 error = ENOENT; 1829 vfs_opterror(opts, "jail \"%s\" not found", name); 1830 goto done_unlock_list; 1831 } else if (error != ENOENT) 1832 goto done_unlock_list; 1833 1834 vfs_opterror(opts, "no jail specified"); 1835 error = ENOENT; 1836 goto done_unlock_list; 1837 1838 found_prison: 1839 /* Get the parameters of the prison. */ 1840 pr->pr_ref++; 1841 locked = PD_LOCKED; 1842 td->td_retval[0] = pr->pr_id; 1843 error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); 1844 if (error != 0 && error != ENOENT) 1845 goto done_deref; 1846 i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; 1847 error = vfs_setopt(opts, "parent", &i, sizeof(i)); 1848 if (error != 0 && error != ENOENT) 1849 goto done_deref; 1850 error = vfs_setopts(opts, "name", prison_name(mypr, pr)); 1851 if (error != 0 && error != ENOENT) 1852 goto done_deref; 1853 error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, 1854 sizeof(pr->pr_cpuset->cs_id)); 1855 if (error != 0 && error != ENOENT) 1856 goto done_deref; 1857 error = vfs_setopts(opts, "path", prison_path(mypr, pr)); 1858 if (error != 0 && error != ENOENT) 1859 goto done_deref; 1860#ifdef INET 1861 error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, 1862 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1863 if (error != 0 && error != ENOENT) 1864 goto done_deref; 1865#endif 1866#ifdef INET6 1867 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, 1868 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1869 if (error != 0 && error != ENOENT) 1870 goto done_deref; 1871#endif 1872 error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, 1873 sizeof(pr->pr_securelevel)); 1874 if (error != 0 && error != ENOENT) 1875 goto done_deref; 1876 error = vfs_setopts(opts, "host.hostname", pr->pr_host); 1877 if (error != 0 && error != ENOENT) 1878 goto done_deref; 1879 error = vfs_setopts(opts, "host.domainname", pr->pr_domain); 1880 if (error != 0 && error != ENOENT) 1881 goto done_deref; 1882 error = vfs_setopts(opts, "host.hostuuid", pr->pr_uuid); 1883 if (error != 0 && error != ENOENT) 1884 goto done_deref; 1885#ifdef COMPAT_IA32 1886 if (td->td_proc->p_sysent->sv_flags & SV_IA32) { 1887 uint32_t hid32 = pr->pr_hostid; 1888 1889 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); 1890 } else 1891#endif 1892 error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, 1893 sizeof(pr->pr_hostid)); 1894 if (error != 0 && error != ENOENT) 1895 goto done_deref; 1896 error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, 1897 sizeof(pr->pr_enforce_statfs)); 1898 if (error != 0 && error != ENOENT) 1899 goto done_deref; 1900 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 1901 fi++) { 1902 if (pr_flag_names[fi] == NULL) 1903 continue; 1904 i = (pr->pr_flags & (1 << fi)) ? 1 : 0; 1905 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i)); 1906 if (error != 0 && error != ENOENT) 1907 goto done_deref; 1908 i = !i; 1909 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i)); 1910 if (error != 0 && error != ENOENT) 1911 goto done_deref; 1912 } 1913 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 1914 fi++) { 1915 if (pr_allow_names[fi] == NULL) 1916 continue; 1917 i = (pr->pr_allow & (1 << fi)) ? 1 : 0; 1918 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i)); 1919 if (error != 0 && error != ENOENT) 1920 goto done_deref; 1921 i = !i; 1922 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i)); 1923 if (error != 0 && error != ENOENT) 1924 goto done_deref; 1925 } 1926 i = (pr->pr_uref == 0); 1927 error = vfs_setopt(opts, "dying", &i, sizeof(i)); 1928 if (error != 0 && error != ENOENT) 1929 goto done_deref; 1930 i = !i; 1931 error = vfs_setopt(opts, "nodying", &i, sizeof(i)); 1932 if (error != 0 && error != ENOENT) 1933 goto done_deref; 1934 1935 /* Get the module parameters. */ 1936 mtx_unlock(&pr->pr_mtx); 1937 locked = 0; 1938 error = osd_jail_call(pr, PR_METHOD_GET, opts); 1939 if (error) 1940 goto done_deref; 1941 prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); 1942 1943 /* By now, all parameters should have been noted. */ 1944 TAILQ_FOREACH(opt, opts, link) { 1945 if (!opt->seen && strcmp(opt->name, "errmsg")) { 1946 error = EINVAL; 1947 vfs_opterror(opts, "unknown parameter: %s", opt->name); 1948 goto done_errmsg; 1949 } 1950 } 1951 1952 /* Write the fetched parameters back to userspace. */ 1953 error = 0; 1954 TAILQ_FOREACH(opt, opts, link) { 1955 if (opt->pos >= 0 && opt->pos != errmsg_pos) { 1956 pos = 2 * opt->pos + 1; 1957 optuio->uio_iov[pos].iov_len = opt->len; 1958 if (opt->value != NULL) { 1959 if (optuio->uio_segflg == UIO_SYSSPACE) { 1960 bcopy(opt->value, 1961 optuio->uio_iov[pos].iov_base, 1962 opt->len); 1963 } else { 1964 error = copyout(opt->value, 1965 optuio->uio_iov[pos].iov_base, 1966 opt->len); 1967 if (error) 1968 break; 1969 } 1970 } 1971 } 1972 } 1973 goto done_errmsg; 1974 1975 done_deref: 1976 prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); 1977 goto done_errmsg; 1978 1979 done_unlock_list: 1980 sx_sunlock(&allprison_lock); 1981 done_errmsg: 1982 if (error && errmsg_pos >= 0) { 1983 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1984 errmsg_pos = 2 * errmsg_pos + 1; 1985 if (errmsg_len > 0) { 1986 if (optuio->uio_segflg == UIO_SYSSPACE) 1987 bcopy(errmsg, 1988 optuio->uio_iov[errmsg_pos].iov_base, 1989 errmsg_len); 1990 else 1991 copyout(errmsg, 1992 optuio->uio_iov[errmsg_pos].iov_base, 1993 errmsg_len); 1994 } 1995 } 1996 vfs_freeopts(opts); 1997 return (error); 1998} 1999 2000 2001/* 2002 * struct jail_remove_args { 2003 * int jid; 2004 * }; 2005 */ 2006int 2007jail_remove(struct thread *td, struct jail_remove_args *uap) 2008{ 2009 struct prison *pr, *cpr, *lpr, *tpr; 2010 int descend, error; 2011 2012 error = priv_check(td, PRIV_JAIL_REMOVE); 2013 if (error) 2014 return (error); 2015 2016 sx_xlock(&allprison_lock); 2017 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2018 if (pr == NULL) { 2019 sx_xunlock(&allprison_lock); 2020 return (EINVAL); 2021 } 2022 2023 /* Remove all descendants of this prison, then remove this prison. */ 2024 pr->pr_ref++; 2025 pr->pr_flags |= PR_REMOVE; 2026 if (!LIST_EMPTY(&pr->pr_children)) { 2027 mtx_unlock(&pr->pr_mtx); 2028 lpr = NULL; 2029 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 2030 mtx_lock(&cpr->pr_mtx); 2031 if (cpr->pr_ref > 0) { 2032 tpr = cpr; 2033 cpr->pr_ref++; 2034 cpr->pr_flags |= PR_REMOVE; 2035 } else { 2036 /* Already removed - do not do it again. */ 2037 tpr = NULL; 2038 } 2039 mtx_unlock(&cpr->pr_mtx); 2040 if (lpr != NULL) { 2041 mtx_lock(&lpr->pr_mtx); 2042 prison_remove_one(lpr); 2043 sx_xlock(&allprison_lock); 2044 } 2045 lpr = tpr; 2046 } 2047 if (lpr != NULL) { 2048 mtx_lock(&lpr->pr_mtx); 2049 prison_remove_one(lpr); 2050 sx_xlock(&allprison_lock); 2051 } 2052 mtx_lock(&pr->pr_mtx); 2053 } 2054 prison_remove_one(pr); 2055 return (0); 2056} 2057 2058static void 2059prison_remove_one(struct prison *pr) 2060{ 2061 struct proc *p; 2062 int deuref; 2063 2064 /* If the prison was persistent, it is not anymore. */ 2065 deuref = 0; 2066 if (pr->pr_flags & PR_PERSIST) { 2067 pr->pr_ref--; 2068 deuref = PD_DEUREF; 2069 pr->pr_flags &= ~PR_PERSIST; 2070 } 2071 2072 /* 2073 * jail_remove added a reference. If that's the only one, remove 2074 * the prison now. 2075 */ 2076 KASSERT(pr->pr_ref > 0, 2077 ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); 2078 if (pr->pr_ref == 1) { 2079 prison_deref(pr, 2080 deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 2081 return; 2082 } 2083 2084 mtx_unlock(&pr->pr_mtx); 2085 sx_xunlock(&allprison_lock); 2086 /* 2087 * Kill all processes unfortunate enough to be attached to this prison. 2088 */ 2089 sx_slock(&allproc_lock); 2090 LIST_FOREACH(p, &allproc, p_list) { 2091 PROC_LOCK(p); 2092 if (p->p_state != PRS_NEW && p->p_ucred && 2093 p->p_ucred->cr_prison == pr) 2094 psignal(p, SIGKILL); 2095 PROC_UNLOCK(p); 2096 } 2097 sx_sunlock(&allproc_lock); 2098 /* Remove the temporary reference added by jail_remove. */ 2099 prison_deref(pr, deuref | PD_DEREF); 2100} 2101 2102 2103/* 2104 * struct jail_attach_args { 2105 * int jid; 2106 * }; 2107 */ 2108int 2109jail_attach(struct thread *td, struct jail_attach_args *uap) 2110{ 2111 struct prison *pr; 2112 int error; 2113 2114 error = priv_check(td, PRIV_JAIL_ATTACH); 2115 if (error) 2116 return (error); 2117 2118 sx_slock(&allprison_lock); 2119 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2120 if (pr == NULL) { 2121 sx_sunlock(&allprison_lock); 2122 return (EINVAL); 2123 } 2124 2125 /* 2126 * Do not allow a process to attach to a prison that is not 2127 * considered to be "alive". 2128 */ 2129 if (pr->pr_uref == 0) { 2130 mtx_unlock(&pr->pr_mtx); 2131 sx_sunlock(&allprison_lock); 2132 return (EINVAL); 2133 } 2134 2135 return (do_jail_attach(td, pr)); 2136} 2137 2138static int 2139do_jail_attach(struct thread *td, struct prison *pr) 2140{ 2141 struct prison *ppr; 2142 struct proc *p; 2143 struct ucred *newcred, *oldcred; 2144 int vfslocked, error; 2145 2146 /* 2147 * XXX: Note that there is a slight race here if two threads 2148 * in the same privileged process attempt to attach to two 2149 * different jails at the same time. It is important for 2150 * user processes not to do this, or they might end up with 2151 * a process root from one prison, but attached to the jail 2152 * of another. 2153 */ 2154 pr->pr_ref++; 2155 pr->pr_uref++; 2156 mtx_unlock(&pr->pr_mtx); 2157 2158 /* Let modules do whatever they need to prepare for attaching. */ 2159 error = osd_jail_call(pr, PR_METHOD_ATTACH, td); 2160 if (error) { 2161 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); 2162 return (error); 2163 } 2164 sx_sunlock(&allprison_lock); 2165 2166 /* 2167 * Reparent the newly attached process to this jail. 2168 */ 2169 ppr = td->td_ucred->cr_prison; 2170 p = td->td_proc; 2171 error = cpuset_setproc_update_set(p, pr->pr_cpuset); 2172 if (error) 2173 goto e_revert_osd; 2174 2175 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2176 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 2177 if ((error = change_dir(pr->pr_root, td)) != 0) 2178 goto e_unlock; 2179#ifdef MAC 2180 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 2181 goto e_unlock; 2182#endif 2183 VOP_UNLOCK(pr->pr_root, 0); 2184 if ((error = change_root(pr->pr_root, td))) 2185 goto e_unlock_giant; 2186 VFS_UNLOCK_GIANT(vfslocked); 2187 2188 newcred = crget(); 2189 PROC_LOCK(p); 2190 oldcred = p->p_ucred; 2191 setsugid(p); 2192 crcopy(newcred, oldcred); 2193 newcred->cr_prison = pr; 2194 p->p_ucred = newcred; 2195 PROC_UNLOCK(p); 2196 crfree(oldcred); 2197 prison_deref(ppr, PD_DEREF | PD_DEUREF); 2198 return (0); 2199 e_unlock: 2200 VOP_UNLOCK(pr->pr_root, 0); 2201 e_unlock_giant: 2202 VFS_UNLOCK_GIANT(vfslocked); 2203 e_revert_osd: 2204 /* Tell modules this thread is still in its old jail after all. */ 2205 (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td); 2206 prison_deref(pr, PD_DEREF | PD_DEUREF); 2207 return (error); 2208} 2209 2210 2211/* 2212 * Returns a locked prison instance, or NULL on failure. 2213 */ 2214struct prison * 2215prison_find(int prid) 2216{ 2217 struct prison *pr; 2218 2219 sx_assert(&allprison_lock, SX_LOCKED); 2220 TAILQ_FOREACH(pr, &allprison, pr_list) { 2221 if (pr->pr_id == prid) { 2222 mtx_lock(&pr->pr_mtx); 2223 if (pr->pr_ref > 0) 2224 return (pr); 2225 mtx_unlock(&pr->pr_mtx); 2226 } 2227 } 2228 return (NULL); 2229} 2230 2231/* 2232 * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 2233 */ 2234struct prison * 2235prison_find_child(struct prison *mypr, int prid) 2236{ 2237 struct prison *pr; 2238 int descend; 2239 2240 sx_assert(&allprison_lock, SX_LOCKED); 2241 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2242 if (pr->pr_id == prid) { 2243 mtx_lock(&pr->pr_mtx); 2244 if (pr->pr_ref > 0) 2245 return (pr); 2246 mtx_unlock(&pr->pr_mtx); 2247 } 2248 } 2249 return (NULL); 2250} 2251 2252/* 2253 * Look for the name relative to mypr. Returns a locked prison or NULL. 2254 */ 2255struct prison * 2256prison_find_name(struct prison *mypr, const char *name) 2257{ 2258 struct prison *pr, *deadpr; 2259 size_t mylen; 2260 int descend; 2261 2262 sx_assert(&allprison_lock, SX_LOCKED); 2263 mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; 2264 again: 2265 deadpr = NULL; 2266 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2267 if (!strcmp(pr->pr_name + mylen, name)) { 2268 mtx_lock(&pr->pr_mtx); 2269 if (pr->pr_ref > 0) { 2270 if (pr->pr_uref > 0) 2271 return (pr); 2272 deadpr = pr; 2273 } 2274 mtx_unlock(&pr->pr_mtx); 2275 } 2276 } 2277 /* There was no valid prison - perhaps there was a dying one. */ 2278 if (deadpr != NULL) { 2279 mtx_lock(&deadpr->pr_mtx); 2280 if (deadpr->pr_ref == 0) { 2281 mtx_unlock(&deadpr->pr_mtx); 2282 goto again; 2283 } 2284 } 2285 return (deadpr); 2286} 2287 2288/* 2289 * See if a prison has the specific flag set. 2290 */ 2291int 2292prison_flag(struct ucred *cred, unsigned flag) 2293{ 2294 2295 /* This is an atomic read, so no locking is necessary. */ 2296 return (cred->cr_prison->pr_flags & flag); 2297} 2298 2299int 2300prison_allow(struct ucred *cred, unsigned flag) 2301{ 2302 2303 /* This is an atomic read, so no locking is necessary. */ 2304 return (cred->cr_prison->pr_allow & flag); 2305} 2306 2307/* 2308 * Remove a prison reference. If that was the last reference, remove the 2309 * prison itself - but not in this context in case there are locks held. 2310 */ 2311void 2312prison_free_locked(struct prison *pr) 2313{ 2314 2315 mtx_assert(&pr->pr_mtx, MA_OWNED); 2316 pr->pr_ref--; 2317 if (pr->pr_ref == 0) { 2318 mtx_unlock(&pr->pr_mtx); 2319 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 2320 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 2321 return; 2322 } 2323 mtx_unlock(&pr->pr_mtx); 2324} 2325 2326void 2327prison_free(struct prison *pr) 2328{ 2329 2330 mtx_lock(&pr->pr_mtx); 2331 prison_free_locked(pr); 2332} 2333 2334static void 2335prison_complete(void *context, int pending) 2336{ 2337 2338 prison_deref((struct prison *)context, 0); 2339} 2340 2341/* 2342 * Remove a prison reference (usually). This internal version assumes no 2343 * mutexes are held, except perhaps the prison itself. If there are no more 2344 * references, release and delist the prison. On completion, the prison lock 2345 * and the allprison lock are both unlocked. 2346 */ 2347static void 2348prison_deref(struct prison *pr, int flags) 2349{ 2350 struct prison *ppr, *tpr; 2351 int vfslocked; 2352 2353 if (!(flags & PD_LOCKED)) 2354 mtx_lock(&pr->pr_mtx); 2355 /* Decrement the user references in a separate loop. */ 2356 if (flags & PD_DEUREF) { 2357 for (tpr = pr;; tpr = tpr->pr_parent) { 2358 if (tpr != pr) 2359 mtx_lock(&tpr->pr_mtx); 2360 if (--tpr->pr_uref > 0) 2361 break; 2362 KASSERT(tpr != &prison0, ("prison0 pr_uref=0")); 2363 mtx_unlock(&tpr->pr_mtx); 2364 } 2365 /* Done if there were only user references to remove. */ 2366 if (!(flags & PD_DEREF)) { 2367 mtx_unlock(&tpr->pr_mtx); 2368 if (flags & PD_LIST_SLOCKED) 2369 sx_sunlock(&allprison_lock); 2370 else if (flags & PD_LIST_XLOCKED) 2371 sx_xunlock(&allprison_lock); 2372 return; 2373 } 2374 if (tpr != pr) { 2375 mtx_unlock(&tpr->pr_mtx); 2376 mtx_lock(&pr->pr_mtx); 2377 } 2378 } 2379 2380 for (;;) { 2381 if (flags & PD_DEREF) 2382 pr->pr_ref--; 2383 /* If the prison still has references, nothing else to do. */ 2384 if (pr->pr_ref > 0) { 2385 mtx_unlock(&pr->pr_mtx); 2386 if (flags & PD_LIST_SLOCKED) 2387 sx_sunlock(&allprison_lock); 2388 else if (flags & PD_LIST_XLOCKED) 2389 sx_xunlock(&allprison_lock); 2390 return; 2391 } 2392 2393 mtx_unlock(&pr->pr_mtx); 2394 if (flags & PD_LIST_SLOCKED) { 2395 if (!sx_try_upgrade(&allprison_lock)) { 2396 sx_sunlock(&allprison_lock); 2397 sx_xlock(&allprison_lock); 2398 } 2399 } else if (!(flags & PD_LIST_XLOCKED)) 2400 sx_xlock(&allprison_lock); 2401 2402 TAILQ_REMOVE(&allprison, pr, pr_list); 2403 LIST_REMOVE(pr, pr_sibling); 2404 ppr = pr->pr_parent; 2405 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 2406 tpr->pr_prisoncount--; 2407 sx_downgrade(&allprison_lock); 2408 2409 if (pr->pr_root != NULL) { 2410 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2411 vrele(pr->pr_root); 2412 VFS_UNLOCK_GIANT(vfslocked); 2413 } 2414 mtx_destroy(&pr->pr_mtx); 2415#ifdef INET 2416 free(pr->pr_ip4, M_PRISON); 2417#endif 2418#ifdef INET6 2419 free(pr->pr_ip6, M_PRISON); 2420#endif 2421 if (pr->pr_cpuset != NULL) 2422 cpuset_rel(pr->pr_cpuset); 2423 osd_jail_exit(pr); 2424 free(pr, M_PRISON); 2425 2426 /* Removing a prison frees a reference on its parent. */ 2427 pr = ppr; 2428 mtx_lock(&pr->pr_mtx); 2429 flags = PD_DEREF | PD_LIST_SLOCKED; 2430 } 2431} 2432 2433void 2434prison_hold_locked(struct prison *pr) 2435{ 2436 2437 mtx_assert(&pr->pr_mtx, MA_OWNED); 2438 KASSERT(pr->pr_ref > 0, 2439 ("Trying to hold dead prison (jid=%d).", pr->pr_id)); 2440 pr->pr_ref++; 2441} 2442 2443void 2444prison_hold(struct prison *pr) 2445{ 2446 2447 mtx_lock(&pr->pr_mtx); 2448 prison_hold_locked(pr); 2449 mtx_unlock(&pr->pr_mtx); 2450} 2451 2452void 2453prison_proc_hold(struct prison *pr) 2454{ 2455 2456 mtx_lock(&pr->pr_mtx); 2457 KASSERT(pr->pr_uref > 0, 2458 ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); 2459 pr->pr_uref++; 2460 mtx_unlock(&pr->pr_mtx); 2461} 2462 2463void 2464prison_proc_free(struct prison *pr) 2465{ 2466 2467 mtx_lock(&pr->pr_mtx); 2468 KASSERT(pr->pr_uref > 0, 2469 ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); 2470 prison_deref(pr, PD_DEUREF | PD_LOCKED); 2471} 2472 2473 2474#ifdef INET 2475/* 2476 * Restrict a prison's IP address list with its parent's, possibly replacing 2477 * it. Return true if the replacement buffer was used (or would have been). 2478 */ 2479static int 2480prison_restrict_ip4(struct prison *pr, struct in_addr *newip4) 2481{ 2482 int ii, ij, used; 2483 struct prison *ppr; 2484 2485 ppr = pr->pr_parent; 2486 if (!(pr->pr_flags & PR_IP4_USER)) { 2487 /* This has no user settings, so just copy the parent's list. */ 2488 if (pr->pr_ip4s < ppr->pr_ip4s) { 2489 /* 2490 * There's no room for the parent's list. Use the 2491 * new list buffer, which is assumed to be big enough 2492 * (if it was passed). If there's no buffer, try to 2493 * allocate one. 2494 */ 2495 used = 1; 2496 if (newip4 == NULL) { 2497 newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), 2498 M_PRISON, M_NOWAIT); 2499 if (newip4 != NULL) 2500 used = 0; 2501 } 2502 if (newip4 != NULL) { 2503 bcopy(ppr->pr_ip4, newip4, 2504 ppr->pr_ip4s * sizeof(*newip4)); 2505 free(pr->pr_ip4, M_PRISON); 2506 pr->pr_ip4 = newip4; 2507 pr->pr_ip4s = ppr->pr_ip4s; 2508 pr->pr_flags |= PR_IP4; 2509 } 2510 return (used); 2511 } 2512 pr->pr_ip4s = ppr->pr_ip4s; 2513 if (pr->pr_ip4s > 0) 2514 bcopy(ppr->pr_ip4, pr->pr_ip4, 2515 pr->pr_ip4s * sizeof(*newip4)); 2516 else if (pr->pr_ip4 != NULL) { 2517 free(pr->pr_ip4, M_PRISON); 2518 pr->pr_ip4 = NULL; 2519 } 2520 pr->pr_flags = 2521 (pr->pr_flags & ~PR_IP4) | (ppr->pr_flags & PR_IP4); 2522 } else if (pr->pr_ip4s > 0 && (ppr->pr_flags & PR_IP4)) { 2523 /* Remove addresses that aren't in the parent. */ 2524 for (ij = 0; ij < ppr->pr_ip4s; ij++) 2525 if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 2526 break; 2527 if (ij < ppr->pr_ip4s) 2528 ii = 1; 2529 else { 2530 bcopy(pr->pr_ip4 + 1, pr->pr_ip4, 2531 --pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2532 ii = 0; 2533 } 2534 for (ij = 1; ii < pr->pr_ip4s; ) { 2535 if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { 2536 ii++; 2537 continue; 2538 } 2539 switch (ij >= ppr->pr_ip4s ? -1 : 2540 qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { 2541 case -1: 2542 bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, 2543 (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); 2544 break; 2545 case 0: 2546 ii++; 2547 ij++; 2548 break; 2549 case 1: 2550 ij++; 2551 break; 2552 } 2553 } 2554 if (pr->pr_ip4s == 0) { 2555 free(pr->pr_ip4, M_PRISON); 2556 pr->pr_ip4 = NULL; 2557 } 2558 } 2559 return (0); 2560} 2561 2562/* 2563 * Pass back primary IPv4 address of this jail. 2564 * 2565 * If not restricted return success but do not alter the address. Caller has 2566 * to make sure to initialize it correctly (e.g. INADDR_ANY). 2567 * 2568 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2569 * Address returned in NBO. 2570 */ 2571int 2572prison_get_ip4(struct ucred *cred, struct in_addr *ia) 2573{ 2574 struct prison *pr; 2575 2576 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2577 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2578 2579 pr = cred->cr_prison; 2580 if (!(pr->pr_flags & PR_IP4)) 2581 return (0); 2582 mtx_lock(&pr->pr_mtx); 2583 if (!(pr->pr_flags & PR_IP4)) { 2584 mtx_unlock(&pr->pr_mtx); 2585 return (0); 2586 } 2587 if (pr->pr_ip4 == NULL) { 2588 mtx_unlock(&pr->pr_mtx); 2589 return (EAFNOSUPPORT); 2590 } 2591 2592 ia->s_addr = pr->pr_ip4[0].s_addr; 2593 mtx_unlock(&pr->pr_mtx); 2594 return (0); 2595} 2596 2597/* 2598 * Return true if pr1 and pr2 have the same IPv4 address restrictions. 2599 */ 2600int 2601prison_equal_ip4(struct prison *pr1, struct prison *pr2) 2602{ 2603 2604 if (pr1 == pr2) 2605 return (1); 2606 2607 /* 2608 * jail_set maintains an exclusive hold on allprison_lock while it 2609 * changes the IP addresses, so only a shared hold is needed. This is 2610 * easier than locking the two prisons which would require finding the 2611 * proper locking order and end up needing allprison_lock anyway. 2612 */ 2613 sx_slock(&allprison_lock); 2614 while (pr1 != &prison0 && !(pr1->pr_flags & PR_IP4_USER)) 2615 pr1 = pr1->pr_parent; 2616 while (pr2 != &prison0 && !(pr2->pr_flags & PR_IP4_USER)) 2617 pr2 = pr2->pr_parent; 2618 sx_sunlock(&allprison_lock); 2619 return (pr1 == pr2); 2620} 2621 2622/* 2623 * Make sure our (source) address is set to something meaningful to this 2624 * jail. 2625 * 2626 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2627 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2628 * doesn't allow IPv4. Address passed in in NBO and returned in NBO. 2629 */ 2630int 2631prison_local_ip4(struct ucred *cred, struct in_addr *ia) 2632{ 2633 struct prison *pr; 2634 struct in_addr ia0; 2635 int error; 2636 2637 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2638 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2639 2640 pr = cred->cr_prison; 2641 if (!(pr->pr_flags & PR_IP4)) 2642 return (0); 2643 mtx_lock(&pr->pr_mtx); 2644 if (!(pr->pr_flags & PR_IP4)) { 2645 mtx_unlock(&pr->pr_mtx); 2646 return (0); 2647 } 2648 if (pr->pr_ip4 == NULL) { 2649 mtx_unlock(&pr->pr_mtx); 2650 return (EAFNOSUPPORT); 2651 } 2652 2653 ia0.s_addr = ntohl(ia->s_addr); 2654 if (ia0.s_addr == INADDR_LOOPBACK) { 2655 ia->s_addr = pr->pr_ip4[0].s_addr; 2656 mtx_unlock(&pr->pr_mtx); 2657 return (0); 2658 } 2659 2660 if (ia0.s_addr == INADDR_ANY) { 2661 /* 2662 * In case there is only 1 IPv4 address, bind directly. 2663 */ 2664 if (pr->pr_ip4s == 1) 2665 ia->s_addr = pr->pr_ip4[0].s_addr; 2666 mtx_unlock(&pr->pr_mtx); 2667 return (0); 2668 } 2669 2670 error = _prison_check_ip4(pr, ia); 2671 mtx_unlock(&pr->pr_mtx); 2672 return (error); 2673} 2674 2675/* 2676 * Rewrite destination address in case we will connect to loopback address. 2677 * 2678 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2679 * Address passed in in NBO and returned in NBO. 2680 */ 2681int 2682prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 2683{ 2684 struct prison *pr; 2685 2686 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2687 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2688 2689 pr = cred->cr_prison; 2690 if (!(pr->pr_flags & PR_IP4)) 2691 return (0); 2692 mtx_lock(&pr->pr_mtx); 2693 if (!(pr->pr_flags & PR_IP4)) { 2694 mtx_unlock(&pr->pr_mtx); 2695 return (0); 2696 } 2697 if (pr->pr_ip4 == NULL) { 2698 mtx_unlock(&pr->pr_mtx); 2699 return (EAFNOSUPPORT); 2700 } 2701 2702 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { 2703 ia->s_addr = pr->pr_ip4[0].s_addr; 2704 mtx_unlock(&pr->pr_mtx); 2705 return (0); 2706 } 2707 2708 /* 2709 * Return success because nothing had to be changed. 2710 */ 2711 mtx_unlock(&pr->pr_mtx); 2712 return (0); 2713} 2714 2715/* 2716 * Check if given address belongs to the jail referenced by cred/prison. 2717 * 2718 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2719 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2720 * doesn't allow IPv4. Address passed in in NBO. 2721 */ 2722static int 2723_prison_check_ip4(struct prison *pr, struct in_addr *ia) 2724{ 2725 int i, a, z, d; 2726 2727 /* 2728 * Check the primary IP. 2729 */ 2730 if (pr->pr_ip4[0].s_addr == ia->s_addr) 2731 return (0); 2732 2733 /* 2734 * All the other IPs are sorted so we can do a binary search. 2735 */ 2736 a = 0; 2737 z = pr->pr_ip4s - 2; 2738 while (a <= z) { 2739 i = (a + z) / 2; 2740 d = qcmp_v4(&pr->pr_ip4[i+1], ia); 2741 if (d > 0) 2742 z = i - 1; 2743 else if (d < 0) 2744 a = i + 1; 2745 else 2746 return (0); 2747 } 2748 2749 return (EADDRNOTAVAIL); 2750} 2751 2752int 2753prison_check_ip4(struct ucred *cred, struct in_addr *ia) 2754{ 2755 struct prison *pr; 2756 int error; 2757 2758 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2759 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2760 2761 pr = cred->cr_prison; 2762 if (!(pr->pr_flags & PR_IP4)) 2763 return (0); 2764 mtx_lock(&pr->pr_mtx); 2765 if (!(pr->pr_flags & PR_IP4)) { 2766 mtx_unlock(&pr->pr_mtx); 2767 return (0); 2768 } 2769 if (pr->pr_ip4 == NULL) { 2770 mtx_unlock(&pr->pr_mtx); 2771 return (EAFNOSUPPORT); 2772 } 2773 2774 error = _prison_check_ip4(pr, ia); 2775 mtx_unlock(&pr->pr_mtx); 2776 return (error); 2777} 2778#endif 2779 2780#ifdef INET6 2781static int 2782prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) 2783{ 2784 int ii, ij, used; 2785 struct prison *ppr; 2786 2787 ppr = pr->pr_parent; 2788 if (!(pr->pr_flags & PR_IP6_USER)) { 2789 /* This has no user settings, so just copy the parent's list. */ 2790 if (pr->pr_ip6s < ppr->pr_ip6s) { 2791 /* 2792 * There's no room for the parent's list. Use the 2793 * new list buffer, which is assumed to be big enough 2794 * (if it was passed). If there's no buffer, try to 2795 * allocate one. 2796 */ 2797 used = 1; 2798 if (newip6 == NULL) { 2799 newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), 2800 M_PRISON, M_NOWAIT); 2801 if (newip6 != NULL) 2802 used = 0; 2803 } 2804 if (newip6 != NULL) { 2805 bcopy(ppr->pr_ip6, newip6, 2806 ppr->pr_ip6s * sizeof(*newip6)); 2807 free(pr->pr_ip6, M_PRISON); 2808 pr->pr_ip6 = newip6; 2809 pr->pr_ip6s = ppr->pr_ip6s; 2810 pr->pr_flags |= PR_IP6; 2811 } 2812 return (used); 2813 } 2814 pr->pr_ip6s = ppr->pr_ip6s; 2815 if (pr->pr_ip6s > 0) 2816 bcopy(ppr->pr_ip6, pr->pr_ip6, 2817 pr->pr_ip6s * sizeof(*newip6)); 2818 else if (pr->pr_ip6 != NULL) { 2819 free(pr->pr_ip6, M_PRISON); 2820 pr->pr_ip6 = NULL; 2821 } 2822 pr->pr_flags = 2823 (pr->pr_flags & ~PR_IP6) | (ppr->pr_flags & PR_IP6); 2824 } else if (pr->pr_ip6s > 0 && (ppr->pr_flags & PR_IP6)) { 2825 /* Remove addresses that aren't in the parent. */ 2826 for (ij = 0; ij < ppr->pr_ip6s; ij++) 2827 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], 2828 &ppr->pr_ip6[ij])) 2829 break; 2830 if (ij < ppr->pr_ip6s) 2831 ii = 1; 2832 else { 2833 bcopy(pr->pr_ip6 + 1, pr->pr_ip6, 2834 --pr->pr_ip6s * sizeof(*pr->pr_ip6)); 2835 ii = 0; 2836 } 2837 for (ij = 1; ii < pr->pr_ip6s; ) { 2838 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], 2839 &ppr->pr_ip6[0])) { 2840 ii++; 2841 continue; 2842 } 2843 switch (ij >= ppr->pr_ip4s ? -1 : 2844 qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { 2845 case -1: 2846 bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, 2847 (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); 2848 break; 2849 case 0: 2850 ii++; 2851 ij++; 2852 break; 2853 case 1: 2854 ij++; 2855 break; 2856 } 2857 } 2858 if (pr->pr_ip6s == 0) { 2859 free(pr->pr_ip6, M_PRISON); 2860 pr->pr_ip6 = NULL; 2861 } 2862 } 2863 return 0; 2864} 2865 2866/* 2867 * Pass back primary IPv6 address for this jail. 2868 * 2869 * If not restricted return success but do not alter the address. Caller has 2870 * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). 2871 * 2872 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 2873 */ 2874int 2875prison_get_ip6(struct ucred *cred, struct in6_addr *ia6) 2876{ 2877 struct prison *pr; 2878 2879 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2880 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2881 2882 pr = cred->cr_prison; 2883 if (!(pr->pr_flags & PR_IP6)) 2884 return (0); 2885 mtx_lock(&pr->pr_mtx); 2886 if (!(pr->pr_flags & PR_IP6)) { 2887 mtx_unlock(&pr->pr_mtx); 2888 return (0); 2889 } 2890 if (pr->pr_ip6 == NULL) { 2891 mtx_unlock(&pr->pr_mtx); 2892 return (EAFNOSUPPORT); 2893 } 2894 2895 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2896 mtx_unlock(&pr->pr_mtx); 2897 return (0); 2898} 2899 2900/* 2901 * Return true if pr1 and pr2 have the same IPv6 address restrictions. 2902 */ 2903int 2904prison_equal_ip6(struct prison *pr1, struct prison *pr2) 2905{ 2906 2907 if (pr1 == pr2) 2908 return (1); 2909 2910 sx_slock(&allprison_lock); 2911 while (pr1 != &prison0 && !(pr1->pr_flags & PR_IP6_USER)) 2912 pr1 = pr1->pr_parent; 2913 while (pr2 != &prison0 && !(pr2->pr_flags & PR_IP6_USER)) 2914 pr2 = pr2->pr_parent; 2915 sx_sunlock(&allprison_lock); 2916 return (pr1 == pr2); 2917} 2918 2919/* 2920 * Make sure our (source) address is set to something meaningful to this jail. 2921 * 2922 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) 2923 * when needed while binding. 2924 * 2925 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 2926 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2927 * doesn't allow IPv6. 2928 */ 2929int 2930prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) 2931{ 2932 struct prison *pr; 2933 int error; 2934 2935 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2936 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2937 2938 pr = cred->cr_prison; 2939 if (!(pr->pr_flags & PR_IP6)) 2940 return (0); 2941 mtx_lock(&pr->pr_mtx); 2942 if (!(pr->pr_flags & PR_IP6)) { 2943 mtx_unlock(&pr->pr_mtx); 2944 return (0); 2945 } 2946 if (pr->pr_ip6 == NULL) { 2947 mtx_unlock(&pr->pr_mtx); 2948 return (EAFNOSUPPORT); 2949 } 2950 2951 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 2952 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2953 mtx_unlock(&pr->pr_mtx); 2954 return (0); 2955 } 2956 2957 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { 2958 /* 2959 * In case there is only 1 IPv6 address, and v6only is true, 2960 * then bind directly. 2961 */ 2962 if (v6only != 0 && pr->pr_ip6s == 1) 2963 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2964 mtx_unlock(&pr->pr_mtx); 2965 return (0); 2966 } 2967 2968 error = _prison_check_ip6(pr, ia6); 2969 mtx_unlock(&pr->pr_mtx); 2970 return (error); 2971} 2972 2973/* 2974 * Rewrite destination address in case we will connect to loopback address. 2975 * 2976 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 2977 */ 2978int 2979prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) 2980{ 2981 struct prison *pr; 2982 2983 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2984 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2985 2986 pr = cred->cr_prison; 2987 if (!(pr->pr_flags & PR_IP6)) 2988 return (0); 2989 mtx_lock(&pr->pr_mtx); 2990 if (!(pr->pr_flags & PR_IP6)) { 2991 mtx_unlock(&pr->pr_mtx); 2992 return (0); 2993 } 2994 if (pr->pr_ip6 == NULL) { 2995 mtx_unlock(&pr->pr_mtx); 2996 return (EAFNOSUPPORT); 2997 } 2998 2999 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3000 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3001 mtx_unlock(&pr->pr_mtx); 3002 return (0); 3003 } 3004 3005 /* 3006 * Return success because nothing had to be changed. 3007 */ 3008 mtx_unlock(&pr->pr_mtx); 3009 return (0); 3010} 3011 3012/* 3013 * Check if given address belongs to the jail referenced by cred/prison. 3014 * 3015 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3016 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3017 * doesn't allow IPv6. 3018 */ 3019static int 3020_prison_check_ip6(struct prison *pr, struct in6_addr *ia6) 3021{ 3022 int i, a, z, d; 3023 3024 /* 3025 * Check the primary IP. 3026 */ 3027 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) 3028 return (0); 3029 3030 /* 3031 * All the other IPs are sorted so we can do a binary search. 3032 */ 3033 a = 0; 3034 z = pr->pr_ip6s - 2; 3035 while (a <= z) { 3036 i = (a + z) / 2; 3037 d = qcmp_v6(&pr->pr_ip6[i+1], ia6); 3038 if (d > 0) 3039 z = i - 1; 3040 else if (d < 0) 3041 a = i + 1; 3042 else 3043 return (0); 3044 } 3045 3046 return (EADDRNOTAVAIL); 3047} 3048 3049int 3050prison_check_ip6(struct ucred *cred, struct in6_addr *ia6) 3051{ 3052 struct prison *pr; 3053 int error; 3054 3055 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3056 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3057 3058 pr = cred->cr_prison; 3059 if (!(pr->pr_flags & PR_IP6)) 3060 return (0); 3061 mtx_lock(&pr->pr_mtx); 3062 if (!(pr->pr_flags & PR_IP6)) { 3063 mtx_unlock(&pr->pr_mtx); 3064 return (0); 3065 } 3066 if (pr->pr_ip6 == NULL) { 3067 mtx_unlock(&pr->pr_mtx); 3068 return (EAFNOSUPPORT); 3069 } 3070 3071 error = _prison_check_ip6(pr, ia6); 3072 mtx_unlock(&pr->pr_mtx); 3073 return (error); 3074} 3075#endif 3076 3077/* 3078 * Check if a jail supports the given address family. 3079 * 3080 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT 3081 * if not. 3082 */ 3083int 3084prison_check_af(struct ucred *cred, int af) 3085{ 3086 struct prison *pr; 3087 int error; 3088 3089 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3090 3091 pr = cred->cr_prison; 3092 error = 0; 3093 switch (af) 3094 { 3095#ifdef INET 3096 case AF_INET: 3097 if (pr->pr_flags & PR_IP4) 3098 { 3099 mtx_lock(&pr->pr_mtx); 3100 if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL) 3101 error = EAFNOSUPPORT; 3102 mtx_unlock(&pr->pr_mtx); 3103 } 3104 break; 3105#endif 3106#ifdef INET6 3107 case AF_INET6: 3108 if (pr->pr_flags & PR_IP6) 3109 { 3110 mtx_lock(&pr->pr_mtx); 3111 if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL) 3112 error = EAFNOSUPPORT; 3113 mtx_unlock(&pr->pr_mtx); 3114 } 3115 break; 3116#endif 3117 case AF_LOCAL: 3118 case AF_ROUTE: 3119 break; 3120 default: 3121 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) 3122 error = EAFNOSUPPORT; 3123 } 3124 return (error); 3125} 3126 3127/* 3128 * Check if given address belongs to the jail referenced by cred (wrapper to 3129 * prison_check_ip[46]). 3130 * 3131 * Returns 0 if jail doesn't restrict the address family or if address belongs 3132 * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if 3133 * the jail doesn't allow the address family. IPv4 Address passed in in NBO. 3134 */ 3135int 3136prison_if(struct ucred *cred, struct sockaddr *sa) 3137{ 3138#ifdef INET 3139 struct sockaddr_in *sai; 3140#endif 3141#ifdef INET6 3142 struct sockaddr_in6 *sai6; 3143#endif 3144 int error; 3145 3146 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3147 KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); 3148 3149 error = 0; 3150 switch (sa->sa_family) 3151 { 3152#ifdef INET 3153 case AF_INET: 3154 sai = (struct sockaddr_in *)sa; 3155 error = prison_check_ip4(cred, &sai->sin_addr); 3156 break; 3157#endif 3158#ifdef INET6 3159 case AF_INET6: 3160 sai6 = (struct sockaddr_in6 *)sa; 3161 error = prison_check_ip6(cred, &sai6->sin6_addr); 3162 break; 3163#endif 3164 default: 3165 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) 3166 error = EAFNOSUPPORT; 3167 } 3168 return (error); 3169} 3170 3171/* 3172 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 3173 */ 3174int 3175prison_check(struct ucred *cred1, struct ucred *cred2) 3176{ 3177 3178#ifdef VIMAGE 3179 if (cred2->cr_vimage->v_procg != cred1->cr_vimage->v_procg) 3180 return (ESRCH); 3181#endif 3182 return ((cred1->cr_prison == cred2->cr_prison || 3183 prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); 3184} 3185 3186/* 3187 * Return 1 if p2 is a child of p1, otherwise 0. 3188 */ 3189int 3190prison_ischild(struct prison *pr1, struct prison *pr2) 3191{ 3192 3193 for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) 3194 if (pr1 == pr2) 3195 return (1); 3196 return (0); 3197} 3198 3199/* 3200 * Return 1 if the passed credential is in a jail, otherwise 0. 3201 */ 3202int 3203jailed(struct ucred *cred) 3204{ 3205 3206 return (cred->cr_prison != &prison0); 3207} 3208 3209/* 3210 * Return the correct hostname (domainname, et al) for the passed credential. 3211 */ 3212void 3213getcredhostname(struct ucred *cred, char *buf, size_t size) 3214{ 3215 struct prison *pr; 3216 3217 /* 3218 * A NULL credential can be used to shortcut to the physical 3219 * system's hostname. 3220 */ 3221 pr = (cred != NULL) ? cred->cr_prison : &prison0; 3222 mtx_lock(&pr->pr_mtx); 3223 strlcpy(buf, pr->pr_host, size); 3224 mtx_unlock(&pr->pr_mtx); 3225} 3226 3227void 3228getcreddomainname(struct ucred *cred, char *buf, size_t size) 3229{ 3230 3231 mtx_lock(&cred->cr_prison->pr_mtx); 3232 strlcpy(buf, cred->cr_prison->pr_domain, size); 3233 mtx_unlock(&cred->cr_prison->pr_mtx); 3234} 3235 3236void 3237getcredhostuuid(struct ucred *cred, char *buf, size_t size) 3238{ 3239 3240 mtx_lock(&cred->cr_prison->pr_mtx); 3241 strlcpy(buf, cred->cr_prison->pr_uuid, size); 3242 mtx_unlock(&cred->cr_prison->pr_mtx); 3243} 3244 3245void 3246getcredhostid(struct ucred *cred, unsigned long *hostid) 3247{ 3248 3249 mtx_lock(&cred->cr_prison->pr_mtx); 3250 *hostid = cred->cr_prison->pr_hostid; 3251 mtx_unlock(&cred->cr_prison->pr_mtx); 3252} 3253 3254/* 3255 * Determine whether the subject represented by cred can "see" 3256 * status of a mount point. 3257 * Returns: 0 for permitted, ENOENT otherwise. 3258 * XXX: This function should be called cr_canseemount() and should be 3259 * placed in kern_prot.c. 3260 */ 3261int 3262prison_canseemount(struct ucred *cred, struct mount *mp) 3263{ 3264 struct prison *pr; 3265 struct statfs *sp; 3266 size_t len; 3267 3268 pr = cred->cr_prison; 3269 if (pr->pr_enforce_statfs == 0) 3270 return (0); 3271 if (pr->pr_root->v_mount == mp) 3272 return (0); 3273 if (pr->pr_enforce_statfs == 2) 3274 return (ENOENT); 3275 /* 3276 * If jail's chroot directory is set to "/" we should be able to see 3277 * all mount-points from inside a jail. 3278 * This is ugly check, but this is the only situation when jail's 3279 * directory ends with '/'. 3280 */ 3281 if (strcmp(pr->pr_path, "/") == 0) 3282 return (0); 3283 len = strlen(pr->pr_path); 3284 sp = &mp->mnt_stat; 3285 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 3286 return (ENOENT); 3287 /* 3288 * Be sure that we don't have situation where jail's root directory 3289 * is "/some/path" and mount point is "/some/pathpath". 3290 */ 3291 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 3292 return (ENOENT); 3293 return (0); 3294} 3295 3296void 3297prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 3298{ 3299 char jpath[MAXPATHLEN]; 3300 struct prison *pr; 3301 size_t len; 3302 3303 pr = cred->cr_prison; 3304 if (pr->pr_enforce_statfs == 0) 3305 return; 3306 if (prison_canseemount(cred, mp) != 0) { 3307 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3308 strlcpy(sp->f_mntonname, "[restricted]", 3309 sizeof(sp->f_mntonname)); 3310 return; 3311 } 3312 if (pr->pr_root->v_mount == mp) { 3313 /* 3314 * Clear current buffer data, so we are sure nothing from 3315 * the valid path left there. 3316 */ 3317 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3318 *sp->f_mntonname = '/'; 3319 return; 3320 } 3321 /* 3322 * If jail's chroot directory is set to "/" we should be able to see 3323 * all mount-points from inside a jail. 3324 */ 3325 if (strcmp(pr->pr_path, "/") == 0) 3326 return; 3327 len = strlen(pr->pr_path); 3328 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 3329 /* 3330 * Clear current buffer data, so we are sure nothing from 3331 * the valid path left there. 3332 */ 3333 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3334 if (*jpath == '\0') { 3335 /* Should never happen. */ 3336 *sp->f_mntonname = '/'; 3337 } else { 3338 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 3339 } 3340} 3341 3342/* 3343 * Check with permission for a specific privilege is granted within jail. We 3344 * have a specific list of accepted privileges; the rest are denied. 3345 */ 3346int 3347prison_priv_check(struct ucred *cred, int priv) 3348{ 3349 3350 if (!jailed(cred)) 3351 return (0); 3352 3353 switch (priv) { 3354 3355 /* 3356 * Allow ktrace privileges for root in jail. 3357 */ 3358 case PRIV_KTRACE: 3359 3360#if 0 3361 /* 3362 * Allow jailed processes to configure audit identity and 3363 * submit audit records (login, etc). In the future we may 3364 * want to further refine the relationship between audit and 3365 * jail. 3366 */ 3367 case PRIV_AUDIT_GETAUDIT: 3368 case PRIV_AUDIT_SETAUDIT: 3369 case PRIV_AUDIT_SUBMIT: 3370#endif 3371 3372 /* 3373 * Allow jailed processes to manipulate process UNIX 3374 * credentials in any way they see fit. 3375 */ 3376 case PRIV_CRED_SETUID: 3377 case PRIV_CRED_SETEUID: 3378 case PRIV_CRED_SETGID: 3379 case PRIV_CRED_SETEGID: 3380 case PRIV_CRED_SETGROUPS: 3381 case PRIV_CRED_SETREUID: 3382 case PRIV_CRED_SETREGID: 3383 case PRIV_CRED_SETRESUID: 3384 case PRIV_CRED_SETRESGID: 3385 3386 /* 3387 * Jail implements visibility constraints already, so allow 3388 * jailed root to override uid/gid-based constraints. 3389 */ 3390 case PRIV_SEEOTHERGIDS: 3391 case PRIV_SEEOTHERUIDS: 3392 3393 /* 3394 * Jail implements inter-process debugging limits already, so 3395 * allow jailed root various debugging privileges. 3396 */ 3397 case PRIV_DEBUG_DIFFCRED: 3398 case PRIV_DEBUG_SUGID: 3399 case PRIV_DEBUG_UNPRIV: 3400 3401 /* 3402 * Allow jail to set various resource limits and login 3403 * properties, and for now, exceed process resource limits. 3404 */ 3405 case PRIV_PROC_LIMIT: 3406 case PRIV_PROC_SETLOGIN: 3407 case PRIV_PROC_SETRLIMIT: 3408 3409 /* 3410 * System V and POSIX IPC privileges are granted in jail. 3411 */ 3412 case PRIV_IPC_READ: 3413 case PRIV_IPC_WRITE: 3414 case PRIV_IPC_ADMIN: 3415 case PRIV_IPC_MSGSIZE: 3416 case PRIV_MQ_ADMIN: 3417 3418 /* 3419 * Jail operations within a jail work on child jails. 3420 */ 3421 case PRIV_JAIL_ATTACH: 3422 case PRIV_JAIL_SET: 3423 case PRIV_JAIL_REMOVE: 3424 3425 /* 3426 * Jail implements its own inter-process limits, so allow 3427 * root processes in jail to change scheduling on other 3428 * processes in the same jail. Likewise for signalling. 3429 */ 3430 case PRIV_SCHED_DIFFCRED: 3431 case PRIV_SCHED_CPUSET: 3432 case PRIV_SIGNAL_DIFFCRED: 3433 case PRIV_SIGNAL_SUGID: 3434 3435 /* 3436 * Allow jailed processes to write to sysctls marked as jail 3437 * writable. 3438 */ 3439 case PRIV_SYSCTL_WRITEJAIL: 3440 3441 /* 3442 * Allow root in jail to manage a variety of quota 3443 * properties. These should likely be conditional on a 3444 * configuration option. 3445 */ 3446 case PRIV_VFS_GETQUOTA: 3447 case PRIV_VFS_SETQUOTA: 3448 3449 /* 3450 * Since Jail relies on chroot() to implement file system 3451 * protections, grant many VFS privileges to root in jail. 3452 * Be careful to exclude mount-related and NFS-related 3453 * privileges. 3454 */ 3455 case PRIV_VFS_READ: 3456 case PRIV_VFS_WRITE: 3457 case PRIV_VFS_ADMIN: 3458 case PRIV_VFS_EXEC: 3459 case PRIV_VFS_LOOKUP: 3460 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 3461 case PRIV_VFS_CHFLAGS_DEV: 3462 case PRIV_VFS_CHOWN: 3463 case PRIV_VFS_CHROOT: 3464 case PRIV_VFS_RETAINSUGID: 3465 case PRIV_VFS_FCHROOT: 3466 case PRIV_VFS_LINK: 3467 case PRIV_VFS_SETGID: 3468 case PRIV_VFS_STAT: 3469 case PRIV_VFS_STICKYFILE: 3470 return (0); 3471 3472 /* 3473 * Depending on the global setting, allow privilege of 3474 * setting system flags. 3475 */ 3476 case PRIV_VFS_SYSFLAGS: 3477 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) 3478 return (0); 3479 else 3480 return (EPERM); 3481 3482 /* 3483 * Depending on the global setting, allow privilege of 3484 * mounting/unmounting file systems. 3485 */ 3486 case PRIV_VFS_MOUNT: 3487 case PRIV_VFS_UNMOUNT: 3488 case PRIV_VFS_MOUNT_NONUSER: 3489 case PRIV_VFS_MOUNT_OWNER: 3490 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT) 3491 return (0); 3492 else 3493 return (EPERM); 3494 3495 /* 3496 * Allow jailed root to bind reserved ports and reuse in-use 3497 * ports. 3498 */ 3499 case PRIV_NETINET_RESERVEDPORT: 3500 case PRIV_NETINET_REUSEPORT: 3501 return (0); 3502 3503 /* 3504 * Allow jailed root to set certian IPv4/6 (option) headers. 3505 */ 3506 case PRIV_NETINET_SETHDROPTS: 3507 return (0); 3508 3509 /* 3510 * Conditionally allow creating raw sockets in jail. 3511 */ 3512 case PRIV_NETINET_RAW: 3513 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) 3514 return (0); 3515 else 3516 return (EPERM); 3517 3518 /* 3519 * Since jail implements its own visibility limits on netstat 3520 * sysctls, allow getcred. This allows identd to work in 3521 * jail. 3522 */ 3523 case PRIV_NETINET_GETCRED: 3524 return (0); 3525 3526 default: 3527 /* 3528 * In all remaining cases, deny the privilege request. This 3529 * includes almost all network privileges, many system 3530 * configuration privileges. 3531 */ 3532 return (EPERM); 3533 } 3534} 3535 3536/* 3537 * Return the part of pr2's name that is relative to pr1, or the whole name 3538 * if it does not directly follow. 3539 */ 3540 3541char * 3542prison_name(struct prison *pr1, struct prison *pr2) 3543{ 3544 char *name; 3545 3546 /* Jails see themselves as "0" (if they see themselves at all). */ 3547 if (pr1 == pr2) 3548 return "0"; 3549 name = pr2->pr_name; 3550 if (prison_ischild(pr1, pr2)) { 3551 /* 3552 * pr1 isn't locked (and allprison_lock may not be either) 3553 * so its length can't be counted on. But the number of dots 3554 * can be counted on - and counted. 3555 */ 3556 for (; pr1 != &prison0; pr1 = pr1->pr_parent) 3557 name = strchr(name, '.') + 1; 3558 } 3559 return (name); 3560} 3561 3562/* 3563 * Return the part of pr2's path that is relative to pr1, or the whole path 3564 * if it does not directly follow. 3565 */ 3566static char * 3567prison_path(struct prison *pr1, struct prison *pr2) 3568{ 3569 char *path1, *path2; 3570 int len1; 3571 3572 path1 = pr1->pr_path; 3573 path2 = pr2->pr_path; 3574 if (!strcmp(path1, "/")) 3575 return (path2); 3576 len1 = strlen(path1); 3577 if (strncmp(path1, path2, len1)) 3578 return (path2); 3579 if (path2[len1] == '\0') 3580 return "/"; 3581 if (path2[len1] == '/') 3582 return (path2 + len1); 3583 return (path2); 3584} 3585 3586 3587/* 3588 * Jail-related sysctls. 3589 */ 3590SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 3591 "Jails"); 3592 3593static int 3594sysctl_jail_list(SYSCTL_HANDLER_ARGS) 3595{ 3596 struct xprison *xp; 3597 struct prison *pr, *cpr; 3598#ifdef INET 3599 struct in_addr *ip4 = NULL; 3600 int ip4s = 0; 3601#endif 3602#ifdef INET6 3603 struct in_addr *ip6 = NULL; 3604 int ip6s = 0; 3605#endif 3606 int descend, error; 3607 3608 xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); 3609 pr = req->td->td_ucred->cr_prison; 3610 error = 0; 3611 sx_slock(&allprison_lock); 3612 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 3613#if defined(INET) || defined(INET6) 3614 again: 3615#endif 3616 mtx_lock(&cpr->pr_mtx); 3617#ifdef INET 3618 if (cpr->pr_ip4s > 0) { 3619 if (ip4s < cpr->pr_ip4s) { 3620 ip4s = cpr->pr_ip4s; 3621 mtx_unlock(&cpr->pr_mtx); 3622 ip4 = realloc(ip4, ip4s * 3623 sizeof(struct in_addr), M_TEMP, M_WAITOK); 3624 goto again; 3625 } 3626 bcopy(cpr->pr_ip4, ip4, 3627 cpr->pr_ip4s * sizeof(struct in_addr)); 3628 } 3629#endif 3630#ifdef INET6 3631 if (cpr->pr_ip6s > 0) { 3632 if (ip6s < cpr->pr_ip6s) { 3633 ip6s = cpr->pr_ip6s; 3634 mtx_unlock(&cpr->pr_mtx); 3635 ip6 = realloc(ip6, ip6s * 3636 sizeof(struct in6_addr), M_TEMP, M_WAITOK); 3637 goto again; 3638 } 3639 bcopy(cpr->pr_ip6, ip6, 3640 cpr->pr_ip6s * sizeof(struct in6_addr)); 3641 } 3642#endif 3643 if (cpr->pr_ref == 0) { 3644 mtx_unlock(&cpr->pr_mtx); 3645 continue; 3646 } 3647 bzero(xp, sizeof(*xp)); 3648 xp->pr_version = XPRISON_VERSION; 3649 xp->pr_id = cpr->pr_id; 3650 xp->pr_state = cpr->pr_uref > 0 3651 ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; 3652 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); 3653 strlcpy(xp->pr_host, cpr->pr_host, sizeof(xp->pr_host)); 3654 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); 3655#ifdef INET 3656 xp->pr_ip4s = cpr->pr_ip4s; 3657#endif 3658#ifdef INET6 3659 xp->pr_ip6s = cpr->pr_ip6s; 3660#endif 3661 mtx_unlock(&cpr->pr_mtx); 3662 error = SYSCTL_OUT(req, xp, sizeof(*xp)); 3663 if (error) 3664 break; 3665#ifdef INET 3666 if (xp->pr_ip4s > 0) { 3667 error = SYSCTL_OUT(req, ip4, 3668 xp->pr_ip4s * sizeof(struct in_addr)); 3669 if (error) 3670 break; 3671 } 3672#endif 3673#ifdef INET6 3674 if (xp->pr_ip6s > 0) { 3675 error = SYSCTL_OUT(req, ip6, 3676 xp->pr_ip6s * sizeof(struct in6_addr)); 3677 if (error) 3678 break; 3679 } 3680#endif 3681 } 3682 sx_sunlock(&allprison_lock); 3683 free(xp, M_TEMP); 3684#ifdef INET 3685 free(ip4, M_TEMP); 3686#endif 3687#ifdef INET6 3688 free(ip6, M_TEMP); 3689#endif 3690 return (error); 3691} 3692 3693SYSCTL_OID(_security_jail, OID_AUTO, list, 3694 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 3695 sysctl_jail_list, "S", "List of active jails"); 3696 3697static int 3698sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 3699{ 3700 int error, injail; 3701 3702 injail = jailed(req->td->td_ucred); 3703 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 3704 3705 return (error); 3706} 3707 3708SYSCTL_PROC(_security_jail, OID_AUTO, jailed, 3709 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 3710 sysctl_jail_jailed, "I", "Process in jail?"); 3711 3712#if defined(INET) || defined(INET6) 3713SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, 3714 &jail_max_af_ips, 0, 3715 "Number of IP addresses a jail may have at most per address family"); 3716#endif 3717 3718/* 3719 * Default parameters for jail(2) compatability. For historical reasons, 3720 * the sysctl names have varying similarity to the parameter names. Prisons 3721 * just see their own parameters, and can't change them. 3722 */ 3723static int 3724sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) 3725{ 3726 struct prison *pr; 3727 int allow, error, i; 3728 3729 pr = req->td->td_ucred->cr_prison; 3730 allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow; 3731 3732 /* Get the current flag value, and convert it to a boolean. */ 3733 i = (allow & arg2) ? 1 : 0; 3734 if (arg1 != NULL) 3735 i = !i; 3736 error = sysctl_handle_int(oidp, &i, 0, req); 3737 if (error || !req->newptr) 3738 return (error); 3739 i = i ? arg2 : 0; 3740 if (arg1 != NULL) 3741 i ^= arg2; 3742 /* 3743 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 3744 * for writing. 3745 */ 3746 mtx_lock(&prison0.pr_mtx); 3747 jail_default_allow = (jail_default_allow & ~arg2) | i; 3748 mtx_unlock(&prison0.pr_mtx); 3749 return (0); 3750} 3751 3752SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, 3753 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3754 NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", 3755 "Processes in jail can set their hostnames"); 3756SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, 3757 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3758 (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", 3759 "Processes in jail are limited to creating UNIX/IP/route sockets only"); 3760SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, 3761 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3762 NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", 3763 "Processes in jail can use System V IPC primitives"); 3764SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, 3765 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3766 NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", 3767 "Prison root can create raw sockets"); 3768SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, 3769 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3770 NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", 3771 "Processes in jail can alter system file flags"); 3772SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, 3773 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3774 NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", 3775 "Processes in jail can mount/unmount jail-friendly file systems"); 3776 3777static int 3778sysctl_jail_default_level(SYSCTL_HANDLER_ARGS) 3779{ 3780 struct prison *pr; 3781 int level, error; 3782 3783 pr = req->td->td_ucred->cr_prison; 3784 level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2); 3785 error = sysctl_handle_int(oidp, &level, 0, req); 3786 if (error || !req->newptr) 3787 return (error); 3788 *(int *)arg1 = level; 3789 return (0); 3790} 3791 3792SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs, 3793 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3794 &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs), 3795 sysctl_jail_default_level, "I", 3796 "Processes in jail cannot see all mounted file systems"); 3797 3798/* 3799 * Nodes to describe jail parameters. Maximum length of string parameters 3800 * is returned in the string itself, and the other parameters exist merely 3801 * to make themselves and their types known. 3802 */ 3803SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0, 3804 "Jail parameters"); 3805 3806int 3807sysctl_jail_param(SYSCTL_HANDLER_ARGS) 3808{ 3809 int i; 3810 long l; 3811 size_t s; 3812 char numbuf[12]; 3813 3814 switch (oidp->oid_kind & CTLTYPE) 3815 { 3816 case CTLTYPE_LONG: 3817 case CTLTYPE_ULONG: 3818 l = 0; 3819#ifdef SCTL_MASK32 3820 if (!(req->flags & SCTL_MASK32)) 3821#endif 3822 return (SYSCTL_OUT(req, &l, sizeof(l))); 3823 case CTLTYPE_INT: 3824 case CTLTYPE_UINT: 3825 i = 0; 3826 return (SYSCTL_OUT(req, &i, sizeof(i))); 3827 case CTLTYPE_STRING: 3828 snprintf(numbuf, sizeof(numbuf), "%d", arg2); 3829 return 3830 (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req)); 3831 case CTLTYPE_STRUCT: 3832 s = (size_t)arg2; 3833 return (SYSCTL_OUT(req, &s, sizeof(s))); 3834 } 3835 return (0); 3836} 3837 3838SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); 3839SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); 3840SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); 3841SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); 3842SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, 3843 "I", "Jail secure level"); 3844SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, 3845 "I", "Jail cannot see all mounted file systems"); 3846SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, 3847 "B", "Jail persistence"); 3848SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, 3849 "B", "Jail is in the process of shutting down"); 3850 3851SYSCTL_JAIL_PARAM_NODE(host, "Jail host info"); 3852SYSCTL_JAIL_PARAM(, nohost, CTLTYPE_INT | CTLFLAG_RW, 3853 "BN", "Jail w/ no host info"); 3854SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, 3855 "Jail hostname"); 3856SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN, 3857 "Jail NIS domainname"); 3858SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN, 3859 "Jail host UUID"); 3860SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW, 3861 "LU", "Jail host ID"); 3862 3863SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset"); 3864SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); 3865 3866#ifdef INET 3867SYSCTL_JAIL_PARAM_NODE(ip4, "Jail IPv4 address virtualization"); 3868SYSCTL_JAIL_PARAM(, noip4, CTLTYPE_INT | CTLFLAG_RW, 3869 "BN", "Jail w/ no IP address virtualization"); 3870SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), 3871 "S,in_addr,a", "Jail IPv4 addresses"); 3872#endif 3873#ifdef INET6 3874SYSCTL_JAIL_PARAM_NODE(ip6, "Jail IPv6 address virtualization"); 3875SYSCTL_JAIL_PARAM(, noip6, CTLTYPE_INT | CTLFLAG_RW, 3876 "BN", "Jail w/ no IP address virtualization"); 3877SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), 3878 "S,in6_addr,a", "Jail IPv6 addresses"); 3879#endif 3880 3881SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags"); 3882SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW, 3883 "B", "Jail may set hostname"); 3884SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW, 3885 "B", "Jail may use SYSV IPC"); 3886SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW, 3887 "B", "Jail may create raw sockets"); 3888SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW, 3889 "B", "Jail may alter system file flags"); 3890SYSCTL_JAIL_PARAM(_allow, mount, CTLTYPE_INT | CTLFLAG_RW, 3891 "B", "Jail may mount/unmount jail-friendly file systems"); 3892SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, 3893 "B", "Jail may set file quotas"); 3894SYSCTL_JAIL_PARAM(_allow, jails, CTLTYPE_INT | CTLFLAG_RW, 3895 "B", "Jail may create child jails"); 3896SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, 3897 "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); 3898 3899 3900#ifdef DDB 3901 3902static void 3903db_show_prison(struct prison *pr) 3904{ 3905 int fi; 3906#if defined(INET) || defined(INET6) 3907 int ii; 3908#endif 3909#ifdef INET6 3910 char ip6buf[INET6_ADDRSTRLEN]; 3911#endif 3912 3913 db_printf("prison %p:\n", pr); 3914 db_printf(" jid = %d\n", pr->pr_id); 3915 db_printf(" name = %s\n", pr->pr_name); 3916 db_printf(" parent = %p\n", pr->pr_parent); 3917 db_printf(" ref = %d\n", pr->pr_ref); 3918 db_printf(" uref = %d\n", pr->pr_uref); 3919 db_printf(" path = %s\n", pr->pr_path); 3920 db_printf(" cpuset = %d\n", pr->pr_cpuset 3921 ? pr->pr_cpuset->cs_id : -1); 3922 db_printf(" root = %p\n", pr->pr_root); 3923 db_printf(" securelevel = %d\n", pr->pr_securelevel); 3924 db_printf(" child = %p\n", LIST_FIRST(&pr->pr_children)); 3925 db_printf(" sibling = %p\n", LIST_NEXT(pr, pr_sibling)); 3926 db_printf(" flags = %x", pr->pr_flags); 3927 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 3928 fi++) 3929 if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi))) 3930 db_printf(" %s", pr_flag_names[fi]); 3931 db_printf(" allow = %x", pr->pr_allow); 3932 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 3933 fi++) 3934 if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi))) 3935 db_printf(" %s", pr_allow_names[fi]); 3936 db_printf("\n"); 3937 db_printf(" enforce_statfs = %d\n", pr->pr_enforce_statfs); 3938 db_printf(" host.hostname = %s\n", pr->pr_host); 3939 db_printf(" host.domainname = %s\n", pr->pr_domain); 3940 db_printf(" host.hostuuid = %s\n", pr->pr_uuid); 3941 db_printf(" host.hostid = %lu\n", pr->pr_hostid); 3942#ifdef INET 3943 db_printf(" ip4s = %d\n", pr->pr_ip4s); 3944 for (ii = 0; ii < pr->pr_ip4s; ii++) 3945 db_printf(" %s %s\n", 3946 ii == 0 ? "ip4 =" : " ", 3947 inet_ntoa(pr->pr_ip4[ii])); 3948#endif 3949#ifdef INET6 3950 db_printf(" ip6s = %d\n", pr->pr_ip6s); 3951 for (ii = 0; ii < pr->pr_ip6s; ii++) 3952 db_printf(" %s %s\n", 3953 ii == 0 ? "ip6 =" : " ", 3954 ip6_sprintf(ip6buf, &pr->pr_ip6[ii])); 3955#endif 3956} 3957 3958DB_SHOW_COMMAND(prison, db_show_prison_command) 3959{ 3960 struct prison *pr; 3961 3962 if (!have_addr) { 3963 /* 3964 * Show all prisons in the list, and prison0 which is not 3965 * listed. 3966 */ 3967 db_show_prison(&prison0); 3968 if (!db_pager_quit) { 3969 TAILQ_FOREACH(pr, &allprison, pr_list) { 3970 db_show_prison(pr); 3971 if (db_pager_quit) 3972 break; 3973 } 3974 } 3975 return; 3976 } 3977 3978 if (addr == 0) 3979 pr = &prison0; 3980 else { 3981 /* Look for a prison with the ID and with references. */ 3982 TAILQ_FOREACH(pr, &allprison, pr_list) 3983 if (pr->pr_id == addr && pr->pr_ref > 0) 3984 break; 3985 if (pr == NULL) 3986 /* Look again, without requiring a reference. */ 3987 TAILQ_FOREACH(pr, &allprison, pr_list) 3988 if (pr->pr_id == addr) 3989 break; 3990 if (pr == NULL) 3991 /* Assume address points to a valid prison. */ 3992 pr = (struct prison *)addr; 3993 } 3994 db_show_prison(pr); 3995} 3996 3997#endif /* DDB */ 3998