kern_fork.c revision 304905
1169689Skan/*- 2169689Skan * Copyright (c) 1982, 1986, 1989, 1991, 1993 3169689Skan * The Regents of the University of California. All rights reserved. 4169689Skan * (c) UNIX System Laboratories, Inc. 5169689Skan * All or some portions of this file are derived from material licensed 6169689Skan * to the University of California by American Telephone and Telegraph 7169689Skan * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8169689Skan * the permission of UNIX System Laboratories, Inc. 9169689Skan * 10169689Skan * Redistribution and use in source and binary forms, with or without 11169689Skan * modification, are permitted provided that the following conditions 12169689Skan * are met: 13169689Skan * 1. Redistributions of source code must retain the above copyright 14169689Skan * notice, this list of conditions and the following disclaimer. 15169689Skan * 2. Redistributions in binary form must reproduce the above copyright 16169689Skan * notice, this list of conditions and the following disclaimer in the 17169689Skan * documentation and/or other materials provided with the distribution. 18169689Skan * 4. Neither the name of the University nor the names of its contributors 19169689Skan * may be used to endorse or promote products derived from this software 20169689Skan * without specific prior written permission. 21169689Skan * 22169689Skan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23169689Skan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24169689Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25169689Skan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26169689Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27169689Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28169689Skan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29169689Skan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30169689Skan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31169689Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32169689Skan * SUCH DAMAGE. 33169689Skan * 34169689Skan * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 35169689Skan */ 36169689Skan 37169689Skan#include <sys/cdefs.h> 38169689Skan__FBSDID("$FreeBSD: stable/10/sys/kern/kern_fork.c 304905 2016-08-27 11:45:05Z kib $"); 39169689Skan 40169689Skan#include "opt_kdtrace.h" 41169689Skan#include "opt_ktrace.h" 42169689Skan#include "opt_kstack_pages.h" 43169689Skan#include "opt_procdesc.h" 44169689Skan 45169689Skan#include <sys/param.h> 46169689Skan#include <sys/systm.h> 47169689Skan#include <sys/sysproto.h> 48169689Skan#include <sys/eventhandler.h> 49169689Skan#include <sys/fcntl.h> 50169689Skan#include <sys/filedesc.h> 51169689Skan#include <sys/jail.h> 52169689Skan#include <sys/kernel.h> 53169689Skan#include <sys/kthread.h> 54169689Skan#include <sys/sysctl.h> 55169689Skan#include <sys/lock.h> 56169689Skan#include <sys/malloc.h> 57169689Skan#include <sys/mutex.h> 58169689Skan#include <sys/priv.h> 59169689Skan#include <sys/proc.h> 60169689Skan#include <sys/procdesc.h> 61169689Skan#include <sys/pioctl.h> 62169689Skan#include <sys/ptrace.h> 63169689Skan#include <sys/racct.h> 64169689Skan#include <sys/resourcevar.h> 65169689Skan#include <sys/sched.h> 66169689Skan#include <sys/syscall.h> 67169689Skan#include <sys/vmmeter.h> 68169689Skan#include <sys/vnode.h> 69169689Skan#include <sys/acct.h> 70169689Skan#include <sys/ktr.h> 71169689Skan#include <sys/ktrace.h> 72169689Skan#include <sys/unistd.h> 73169689Skan#include <sys/sdt.h> 74169689Skan#include <sys/sx.h> 75169689Skan#include <sys/sysent.h> 76169689Skan#include <sys/signalvar.h> 77169689Skan 78169689Skan#include <security/audit/audit.h> 79169689Skan#include <security/mac/mac_framework.h> 80169689Skan 81169689Skan#include <vm/vm.h> 82169689Skan#include <vm/pmap.h> 83169689Skan#include <vm/vm_map.h> 84169689Skan#include <vm/vm_extern.h> 85169689Skan#include <vm/uma.h> 86169689Skan 87169689Skan#ifdef KDTRACE_HOOKS 88169689Skan#include <sys/dtrace_bsd.h> 89169689Skandtrace_fork_func_t dtrace_fasttrap_fork; 90169689Skan#endif 91169689Skan 92169689SkanSDT_PROVIDER_DECLARE(proc); 93169689SkanSDT_PROBE_DEFINE3(proc, , , create, "struct proc *", "struct proc *", "int"); 94169689Skan 95169689Skan#ifndef _SYS_SYSPROTO_H_ 96169689Skanstruct fork_args { 97169689Skan int dummy; 98169689Skan}; 99169689Skan#endif 100169689Skan 101169689Skan/* ARGSUSED */ 102169689Skanint 103169689Skansys_fork(struct thread *td, struct fork_args *uap) 104169689Skan{ 105169689Skan int error; 106169689Skan struct proc *p2; 107169689Skan 108169689Skan error = fork1(td, RFFDG | RFPROC, 0, &p2, NULL, 0); 109169689Skan if (error == 0) { 110169689Skan td->td_retval[0] = p2->p_pid; 111169689Skan td->td_retval[1] = 0; 112169689Skan } 113169689Skan return (error); 114169689Skan} 115169689Skan 116169689Skan/* ARGUSED */ 117169689Skanint 118169689Skansys_pdfork(td, uap) 119169689Skan struct thread *td; 120169689Skan struct pdfork_args *uap; 121169689Skan{ 122169689Skan#ifdef PROCDESC 123169689Skan int error, fd; 124169689Skan struct proc *p2; 125169689Skan 126169689Skan /* 127169689Skan * It is necessary to return fd by reference because 0 is a valid file 128169689Skan * descriptor number, and the child needs to be able to distinguish 129169689Skan * itself from the parent using the return value. 130169689Skan */ 131169689Skan error = fork1(td, RFFDG | RFPROC | RFPROCDESC, 0, &p2, 132169689Skan &fd, uap->flags); 133169689Skan if (error == 0) { 134169689Skan td->td_retval[0] = p2->p_pid; 135169689Skan td->td_retval[1] = 0; 136169689Skan error = copyout(&fd, uap->fdp, sizeof(fd)); 137169689Skan } 138169689Skan return (error); 139169689Skan#else 140169689Skan return (ENOSYS); 141169689Skan#endif 142169689Skan} 143169689Skan 144169689Skan/* ARGSUSED */ 145169689Skanint 146169689Skansys_vfork(struct thread *td, struct vfork_args *uap) 147169689Skan{ 148169689Skan int error, flags; 149169689Skan struct proc *p2; 150169689Skan 151169689Skan flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; 152169689Skan error = fork1(td, flags, 0, &p2, NULL, 0); 153169689Skan if (error == 0) { 154169689Skan td->td_retval[0] = p2->p_pid; 155169689Skan td->td_retval[1] = 0; 156169689Skan } 157169689Skan return (error); 158169689Skan} 159169689Skan 160169689Skanint 161169689Skansys_rfork(struct thread *td, struct rfork_args *uap) 162169689Skan{ 163169689Skan struct proc *p2; 164169689Skan int error; 165169689Skan 166169689Skan /* Don't allow kernel-only flags. */ 167169689Skan if ((uap->flags & RFKERNELONLY) != 0) 168169689Skan return (EINVAL); 169169689Skan 170169689Skan AUDIT_ARG_FFLAGS(uap->flags); 171169689Skan error = fork1(td, uap->flags, 0, &p2, NULL, 0); 172169689Skan if (error == 0) { 173169689Skan td->td_retval[0] = p2 ? p2->p_pid : 0; 174169689Skan td->td_retval[1] = 0; 175169689Skan } 176169689Skan return (error); 177169689Skan} 178169689Skan 179169689Skanint nprocs = 1; /* process 0 */ 180169689Skanint lastpid = 0; 181169689SkanSYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 182169689Skan "Last used PID"); 183169689Skan 184169689Skan/* 185169689Skan * Random component to lastpid generation. We mix in a random factor to make 186169689Skan * it a little harder to predict. We sanity check the modulus value to avoid 187169689Skan * doing it in critical paths. Don't let it be too small or we pointlessly 188169689Skan * waste randomness entropy, and don't let it be impossibly large. Using a 189169689Skan * modulus that is too big causes a LOT more process table scans and slows 190169689Skan * down fork processing as the pidchecked caching is defeated. 191169689Skan */ 192169689Skanstatic int randompid = 0; 193169689Skan 194169689Skanstatic int 195169689Skansysctl_kern_randompid(SYSCTL_HANDLER_ARGS) 196169689Skan{ 197169689Skan int error, pid; 198169689Skan 199169689Skan error = sysctl_wire_old_buffer(req, sizeof(int)); 200169689Skan if (error != 0) 201169689Skan return(error); 202169689Skan sx_xlock(&allproc_lock); 203169689Skan pid = randompid; 204169689Skan error = sysctl_handle_int(oidp, &pid, 0, req); 205169689Skan if (error == 0 && req->newptr != NULL) { 206169689Skan if (pid < 0 || pid > pid_max - 100) /* out of range */ 207169689Skan pid = pid_max - 100; 208169689Skan else if (pid < 2) /* NOP */ 209169689Skan pid = 0; 210169689Skan else if (pid < 100) /* Make it reasonable */ 211169689Skan pid = 100; 212169689Skan randompid = pid; 213169689Skan } 214169689Skan sx_xunlock(&allproc_lock); 215169689Skan return (error); 216169689Skan} 217169689Skan 218169689SkanSYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 219169689Skan 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); 220169689Skan 221169689Skanstatic int 222169689Skanfork_findpid(int flags) 223169689Skan{ 224169689Skan struct proc *p; 225169689Skan int trypid; 226169689Skan static int pidchecked = 0; 227169689Skan 228169689Skan /* 229169689Skan * Requires allproc_lock in order to iterate over the list 230169689Skan * of processes, and proctree_lock to access p_pgrp. 231169689Skan */ 232169689Skan sx_assert(&allproc_lock, SX_LOCKED); 233169689Skan sx_assert(&proctree_lock, SX_LOCKED); 234169689Skan 235169689Skan /* 236169689Skan * Find an unused process ID. We remember a range of unused IDs 237169689Skan * ready to use (from lastpid+1 through pidchecked-1). 238169689Skan * 239169689Skan * If RFHIGHPID is set (used during system boot), do not allocate 240169689Skan * low-numbered pids. 241169689Skan */ 242169689Skan trypid = lastpid + 1; 243169689Skan if (flags & RFHIGHPID) { 244169689Skan if (trypid < 10) 245169689Skan trypid = 10; 246169689Skan } else { 247169689Skan if (randompid) 248169689Skan trypid += arc4random() % randompid; 249169689Skan } 250169689Skanretry: 251169689Skan /* 252169689Skan * If the process ID prototype has wrapped around, 253169689Skan * restart somewhat above 0, as the low-numbered procs 254169689Skan * tend to include daemons that don't exit. 255169689Skan */ 256169689Skan if (trypid >= pid_max) { 257169689Skan trypid = trypid % pid_max; 258169689Skan if (trypid < 100) 259169689Skan trypid += 100; 260169689Skan pidchecked = 0; 261169689Skan } 262169689Skan if (trypid >= pidchecked) { 263169689Skan int doingzomb = 0; 264169689Skan 265169689Skan pidchecked = PID_MAX; 266169689Skan /* 267169689Skan * Scan the active and zombie procs to check whether this pid 268169689Skan * is in use. Remember the lowest pid that's greater 269169689Skan * than trypid, so we can avoid checking for a while. 270169689Skan * 271169689Skan * Avoid reuse of the process group id, session id or 272169689Skan * the reaper subtree id. Note that for process group 273169689Skan * and sessions, the amount of reserved pids is 274169689Skan * limited by process limit. For the subtree ids, the 275169689Skan * id is kept reserved only while there is a 276169689Skan * non-reaped process in the subtree, so amount of 277169689Skan * reserved pids is limited by process limit times 278169689Skan * two. 279169689Skan */ 280169689Skan p = LIST_FIRST(&allproc); 281169689Skanagain: 282169689Skan for (; p != NULL; p = LIST_NEXT(p, p_list)) { 283169689Skan while (p->p_pid == trypid || 284169689Skan p->p_reapsubtree == trypid || 285169689Skan (p->p_pgrp != NULL && 286169689Skan (p->p_pgrp->pg_id == trypid || 287169689Skan (p->p_session != NULL && 288169689Skan p->p_session->s_sid == trypid)))) { 289169689Skan trypid++; 290169689Skan if (trypid >= pidchecked) 291169689Skan goto retry; 292169689Skan } 293169689Skan if (p->p_pid > trypid && pidchecked > p->p_pid) 294169689Skan pidchecked = p->p_pid; 295169689Skan if (p->p_pgrp != NULL) { 296169689Skan if (p->p_pgrp->pg_id > trypid && 297169689Skan pidchecked > p->p_pgrp->pg_id) 298169689Skan pidchecked = p->p_pgrp->pg_id; 299169689Skan if (p->p_session != NULL && 300169689Skan p->p_session->s_sid > trypid && 301169689Skan pidchecked > p->p_session->s_sid) 302169689Skan pidchecked = p->p_session->s_sid; 303169689Skan } 304169689Skan } 305169689Skan if (!doingzomb) { 306169689Skan doingzomb = 1; 307169689Skan p = LIST_FIRST(&zombproc); 308169689Skan goto again; 309169689Skan } 310169689Skan } 311169689Skan 312169689Skan /* 313169689Skan * RFHIGHPID does not mess with the lastpid counter during boot. 314169689Skan */ 315169689Skan if (flags & RFHIGHPID) 316169689Skan pidchecked = 0; 317169689Skan else 318169689Skan lastpid = trypid; 319169689Skan 320169689Skan return (trypid); 321169689Skan} 322169689Skan 323169689Skanstatic int 324169689Skanfork_norfproc(struct thread *td, int flags) 325169689Skan{ 326169689Skan int error; 327169689Skan struct proc *p1; 328169689Skan 329169689Skan KASSERT((flags & RFPROC) == 0, 330169689Skan ("fork_norfproc called with RFPROC set")); 331169689Skan p1 = td->td_proc; 332169689Skan 333169689Skan if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && 334169689Skan (flags & (RFCFDG | RFFDG))) { 335169689Skan PROC_LOCK(p1); 336169689Skan if (thread_single(p1, SINGLE_BOUNDARY)) { 337169689Skan PROC_UNLOCK(p1); 338169689Skan return (ERESTART); 339169689Skan } 340169689Skan PROC_UNLOCK(p1); 341169689Skan } 342169689Skan 343169689Skan error = vm_forkproc(td, NULL, NULL, NULL, flags); 344169689Skan if (error) 345169689Skan goto fail; 346169689Skan 347169689Skan /* 348169689Skan * Close all file descriptors. 349169689Skan */ 350169689Skan if (flags & RFCFDG) { 351169689Skan struct filedesc *fdtmp; 352169689Skan fdtmp = fdinit(td->td_proc->p_fd); 353169689Skan fdescfree(td); 354169689Skan p1->p_fd = fdtmp; 355169689Skan } 356169689Skan 357169689Skan /* 358169689Skan * Unshare file descriptors (from parent). 359169689Skan */ 360169689Skan if (flags & RFFDG) 361169689Skan fdunshare(td); 362169689Skan 363169689Skanfail: 364169689Skan if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && 365169689Skan (flags & (RFCFDG | RFFDG))) { 366169689Skan PROC_LOCK(p1); 367169689Skan thread_single_end(p1, SINGLE_BOUNDARY); 368169689Skan PROC_UNLOCK(p1); 369169689Skan } 370169689Skan return (error); 371169689Skan} 372169689Skan 373169689Skanstatic void 374169689Skando_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, 375169689Skan struct vmspace *vm2, int pdflags) 376169689Skan{ 377169689Skan struct proc *p1, *pptr; 378169689Skan int p2_held, trypid; 379169689Skan struct filedesc *fd; 380169689Skan struct filedesc_to_leader *fdtol; 381169689Skan struct sigacts *newsigacts; 382169689Skan 383169689Skan sx_assert(&proctree_lock, SX_SLOCKED); 384169689Skan sx_assert(&allproc_lock, SX_XLOCKED); 385169689Skan 386169689Skan p2_held = 0; 387169689Skan p1 = td->td_proc; 388169689Skan 389169689Skan trypid = fork_findpid(flags); 390169689Skan 391169689Skan sx_sunlock(&proctree_lock); 392169689Skan 393169689Skan p2->p_state = PRS_NEW; /* protect against others */ 394169689Skan p2->p_pid = trypid; 395169689Skan AUDIT_ARG_PID(p2->p_pid); 396169689Skan LIST_INSERT_HEAD(&allproc, p2, p_list); 397169689Skan allproc_gen++; 398169689Skan LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); 399169689Skan tidhash_add(td2); 400169689Skan PROC_LOCK(p2); 401169689Skan PROC_LOCK(p1); 402169689Skan 403169689Skan sx_xunlock(&allproc_lock); 404169689Skan 405169689Skan bcopy(&p1->p_startcopy, &p2->p_startcopy, 406169689Skan __rangeof(struct proc, p_startcopy, p_endcopy)); 407169689Skan pargs_hold(p2->p_args); 408169689Skan PROC_UNLOCK(p1); 409169689Skan 410169689Skan bzero(&p2->p_startzero, 411169689Skan __rangeof(struct proc, p_startzero, p_endzero)); 412169689Skan p2->p_treeflag = 0; 413169689Skan p2->p_filemon = NULL; 414169689Skan p2->p_ptevents = 0; 415169689Skan 416169689Skan /* Tell the prison that we exist. */ 417169689Skan prison_proc_hold(p2->p_ucred->cr_prison); 418169689Skan 419169689Skan PROC_UNLOCK(p2); 420169689Skan 421169689Skan /* 422169689Skan * Malloc things while we don't hold any locks. 423169689Skan */ 424169689Skan if (flags & RFSIGSHARE) 425169689Skan newsigacts = NULL; 426169689Skan else 427169689Skan newsigacts = sigacts_alloc(); 428169689Skan 429169689Skan /* 430169689Skan * Copy filedesc. 431169689Skan */ 432169689Skan if (flags & RFCFDG) { 433169689Skan fd = fdinit(p1->p_fd); 434169689Skan fdtol = NULL; 435169689Skan } else if (flags & RFFDG) { 436169689Skan fd = fdcopy(p1->p_fd); 437169689Skan fdtol = NULL; 438169689Skan } else { 439169689Skan fd = fdshare(p1->p_fd); 440169689Skan if (p1->p_fdtol == NULL) 441169689Skan p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL, 442169689Skan p1->p_leader); 443169689Skan if ((flags & RFTHREAD) != 0) { 444169689Skan /* 445169689Skan * Shared file descriptor table, and shared 446169689Skan * process leaders. 447169689Skan */ 448169689Skan fdtol = p1->p_fdtol; 449169689Skan FILEDESC_XLOCK(p1->p_fd); 450169689Skan fdtol->fdl_refcount++; 451169689Skan FILEDESC_XUNLOCK(p1->p_fd); 452169689Skan } else { 453169689Skan /* 454169689Skan * Shared file descriptor table, and different 455169689Skan * process leaders. 456169689Skan */ 457169689Skan fdtol = filedesc_to_leader_alloc(p1->p_fdtol, 458169689Skan p1->p_fd, p2); 459169689Skan } 460169689Skan } 461169689Skan /* 462169689Skan * Make a proc table entry for the new process. 463169689Skan * Start by zeroing the section of proc that is zero-initialized, 464169689Skan * then copy the section that is copied directly from the parent. 465169689Skan */ 466169689Skan 467169689Skan PROC_LOCK(p2); 468169689Skan PROC_LOCK(p1); 469169689Skan 470169689Skan bzero(&td2->td_startzero, 471169689Skan __rangeof(struct thread, td_startzero, td_endzero)); 472169689Skan td2->td_su = NULL; 473169689Skan td2->td_sleeptimo = 0; 474169689Skan 475169689Skan bcopy(&td->td_startcopy, &td2->td_startcopy, 476169689Skan __rangeof(struct thread, td_startcopy, td_endcopy)); 477169689Skan 478169689Skan bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name)); 479169689Skan td2->td_sigstk = td->td_sigstk; 480169689Skan td2->td_flags = TDF_INMEM; 481169689Skan td2->td_lend_user_pri = PRI_MAX; 482169689Skan td2->td_dbg_sc_code = td->td_dbg_sc_code; 483169689Skan td2->td_dbg_sc_narg = td->td_dbg_sc_narg; 484169689Skan 485169689Skan#ifdef VIMAGE 486169689Skan td2->td_vnet = NULL; 487169689Skan td2->td_vnet_lpush = NULL; 488169689Skan#endif 489169689Skan 490169689Skan /* 491169689Skan * Allow the scheduler to initialize the child. 492169689Skan */ 493169689Skan thread_lock(td); 494169689Skan sched_fork(td, td2); 495169689Skan thread_unlock(td); 496169689Skan 497169689Skan /* 498169689Skan * Duplicate sub-structures as needed. 499169689Skan * Increase reference counts on shared objects. 500169689Skan */ 501169689Skan p2->p_flag = P_INMEM; 502169689Skan p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC); 503169689Skan p2->p_swtick = ticks; 504169689Skan if (p1->p_flag & P_PROFIL) 505169689Skan startprofclock(p2); 506169689Skan td2->td_ucred = crhold(p2->p_ucred); 507169689Skan 508169689Skan if (flags & RFSIGSHARE) { 509169689Skan p2->p_sigacts = sigacts_hold(p1->p_sigacts); 510169689Skan } else { 511169689Skan sigacts_copy(newsigacts, p1->p_sigacts); 512169689Skan p2->p_sigacts = newsigacts; 513169689Skan } 514169689Skan 515169689Skan if (flags & RFTSIGZMB) 516169689Skan p2->p_sigparent = RFTSIGNUM(flags); 517169689Skan else if (flags & RFLINUXTHPN) 518169689Skan p2->p_sigparent = SIGUSR1; 519169689Skan else 520169689Skan p2->p_sigparent = SIGCHLD; 521169689Skan 522169689Skan p2->p_textvp = p1->p_textvp; 523169689Skan p2->p_fd = fd; 524169689Skan p2->p_fdtol = fdtol; 525169689Skan 526169689Skan if (p1->p_flag2 & P2_INHERIT_PROTECTED) { 527169689Skan p2->p_flag |= P_PROTECTED; 528169689Skan p2->p_flag2 |= P2_INHERIT_PROTECTED; 529169689Skan } 530169689Skan 531169689Skan /* 532169689Skan * p_limit is copy-on-write. Bump its refcount. 533169689Skan */ 534169689Skan lim_fork(p1, p2); 535169689Skan 536169689Skan pstats_fork(p1->p_stats, p2->p_stats); 537169689Skan 538169689Skan PROC_UNLOCK(p1); 539169689Skan PROC_UNLOCK(p2); 540169689Skan 541169689Skan /* Bump references to the text vnode (for procfs). */ 542169689Skan if (p2->p_textvp) 543169689Skan vref(p2->p_textvp); 544169689Skan 545169689Skan /* 546169689Skan * Set up linkage for kernel based threading. 547169689Skan */ 548169689Skan if ((flags & RFTHREAD) != 0) { 549169689Skan mtx_lock(&ppeers_lock); 550169689Skan p2->p_peers = p1->p_peers; 551169689Skan p1->p_peers = p2; 552169689Skan p2->p_leader = p1->p_leader; 553169689Skan mtx_unlock(&ppeers_lock); 554169689Skan PROC_LOCK(p1->p_leader); 555169689Skan if ((p1->p_leader->p_flag & P_WEXIT) != 0) { 556169689Skan PROC_UNLOCK(p1->p_leader); 557169689Skan /* 558169689Skan * The task leader is exiting, so process p1 is 559169689Skan * going to be killed shortly. Since p1 obviously 560169689Skan * isn't dead yet, we know that the leader is either 561169689Skan * sending SIGKILL's to all the processes in this 562169689Skan * task or is sleeping waiting for all the peers to 563169689Skan * exit. We let p1 complete the fork, but we need 564169689Skan * to go ahead and kill the new process p2 since 565169689Skan * the task leader may not get a chance to send 566169689Skan * SIGKILL to it. We leave it on the list so that 567169689Skan * the task leader will wait for this new process 568169689Skan * to commit suicide. 569169689Skan */ 570169689Skan PROC_LOCK(p2); 571169689Skan kern_psignal(p2, SIGKILL); 572169689Skan PROC_UNLOCK(p2); 573169689Skan } else 574169689Skan PROC_UNLOCK(p1->p_leader); 575169689Skan } else { 576169689Skan p2->p_peers = NULL; 577169689Skan p2->p_leader = p2; 578169689Skan } 579169689Skan 580169689Skan sx_xlock(&proctree_lock); 581169689Skan PGRP_LOCK(p1->p_pgrp); 582169689Skan PROC_LOCK(p2); 583169689Skan PROC_LOCK(p1); 584169689Skan 585169689Skan /* 586169689Skan * Preserve some more flags in subprocess. P_PROFIL has already 587169689Skan * been preserved. 588169689Skan */ 589169689Skan p2->p_flag |= p1->p_flag & P_SUGID; 590169689Skan td2->td_pflags |= td->td_pflags & TDP_ALTSTACK; 591169689Skan SESS_LOCK(p1->p_session); 592169689Skan if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) 593169689Skan p2->p_flag |= P_CONTROLT; 594169689Skan SESS_UNLOCK(p1->p_session); 595169689Skan if (flags & RFPPWAIT) 596169689Skan p2->p_flag |= P_PPWAIT; 597169689Skan 598169689Skan p2->p_pgrp = p1->p_pgrp; 599169689Skan LIST_INSERT_AFTER(p1, p2, p_pglist); 600169689Skan PGRP_UNLOCK(p1->p_pgrp); 601169689Skan LIST_INIT(&p2->p_children); 602169689Skan LIST_INIT(&p2->p_orphans); 603169689Skan 604169689Skan callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0); 605169689Skan 606169689Skan /* 607169689Skan * If PF_FORK is set, the child process inherits the 608169689Skan * procfs ioctl flags from its parent. 609169689Skan */ 610169689Skan if (p1->p_pfsflags & PF_FORK) { 611169689Skan p2->p_stops = p1->p_stops; 612169689Skan p2->p_pfsflags = p1->p_pfsflags; 613169689Skan } 614169689Skan 615169689Skan /* 616169689Skan * This begins the section where we must prevent the parent 617169689Skan * from being swapped. 618169689Skan */ 619169689Skan _PHOLD(p1); 620169689Skan PROC_UNLOCK(p1); 621169689Skan 622169689Skan /* 623169689Skan * Attach the new process to its parent. 624169689Skan * 625169689Skan * If RFNOWAIT is set, the newly created process becomes a child 626169689Skan * of init. This effectively disassociates the child from the 627169689Skan * parent. 628169689Skan */ 629169689Skan if ((flags & RFNOWAIT) != 0) { 630169689Skan pptr = p1->p_reaper; 631169689Skan p2->p_reaper = pptr; 632169689Skan } else { 633169689Skan p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ? 634169689Skan p1 : p1->p_reaper; 635169689Skan pptr = p1; 636169689Skan } 637169689Skan p2->p_pptr = pptr; 638169689Skan LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); 639169689Skan LIST_INIT(&p2->p_reaplist); 640169689Skan LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling); 641169689Skan if (p2->p_reaper == p1) 642169689Skan p2->p_reapsubtree = p2->p_pid; 643169689Skan else 644169689Skan p2->p_reapsubtree = p1->p_reapsubtree; 645169689Skan sx_xunlock(&proctree_lock); 646169689Skan 647169689Skan /* Inform accounting that we have forked. */ 648169689Skan p2->p_acflag = AFORK; 649169689Skan PROC_UNLOCK(p2); 650169689Skan 651169689Skan#ifdef KTRACE 652169689Skan ktrprocfork(p1, p2); 653169689Skan#endif 654169689Skan 655169689Skan /* 656169689Skan * Finish creating the child process. It will return via a different 657169689Skan * execution path later. (ie: directly into user mode) 658169689Skan */ 659169689Skan vm_forkproc(td, p2, td2, vm2, flags); 660169689Skan 661169689Skan if (flags == (RFFDG | RFPROC)) { 662169689Skan PCPU_INC(cnt.v_forks); 663169689Skan PCPU_ADD(cnt.v_forkpages, p2->p_vmspace->vm_dsize + 664169689Skan p2->p_vmspace->vm_ssize); 665169689Skan } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { 666169689Skan PCPU_INC(cnt.v_vforks); 667169689Skan PCPU_ADD(cnt.v_vforkpages, p2->p_vmspace->vm_dsize + 668169689Skan p2->p_vmspace->vm_ssize); 669169689Skan } else if (p1 == &proc0) { 670169689Skan PCPU_INC(cnt.v_kthreads); 671169689Skan PCPU_ADD(cnt.v_kthreadpages, p2->p_vmspace->vm_dsize + 672169689Skan p2->p_vmspace->vm_ssize); 673169689Skan } else { 674169689Skan PCPU_INC(cnt.v_rforks); 675169689Skan PCPU_ADD(cnt.v_rforkpages, p2->p_vmspace->vm_dsize + 676169689Skan p2->p_vmspace->vm_ssize); 677169689Skan } 678169689Skan 679169689Skan#ifdef PROCDESC 680169689Skan /* 681169689Skan * Associate the process descriptor with the process before anything 682169689Skan * can happen that might cause that process to need the descriptor. 683169689Skan * However, don't do this until after fork(2) can no longer fail. 684169689Skan */ 685169689Skan if (flags & RFPROCDESC) 686169689Skan procdesc_new(p2, pdflags); 687169689Skan#endif 688169689Skan 689169689Skan /* 690169689Skan * Both processes are set up, now check if any loadable modules want 691169689Skan * to adjust anything. 692169689Skan */ 693169689Skan EVENTHANDLER_INVOKE(process_fork, p1, p2, flags); 694169689Skan 695169689Skan /* 696169689Skan * Set the child start time and mark the process as being complete. 697169689Skan */ 698169689Skan PROC_LOCK(p2); 699169689Skan PROC_LOCK(p1); 700169689Skan microuptime(&p2->p_stats->p_start); 701169689Skan PROC_SLOCK(p2); 702169689Skan p2->p_state = PRS_NORMAL; 703169689Skan PROC_SUNLOCK(p2); 704169689Skan 705169689Skan#ifdef KDTRACE_HOOKS 706169689Skan /* 707169689Skan * Tell the DTrace fasttrap provider about the new process so that any 708169689Skan * tracepoints inherited from the parent can be removed. We have to do 709169689Skan * this only after p_state is PRS_NORMAL since the fasttrap module will 710169689Skan * use pfind() later on. 711169689Skan */ 712169689Skan if ((flags & RFMEM) == 0 && dtrace_fasttrap_fork) 713169689Skan dtrace_fasttrap_fork(p1, p2); 714169689Skan#endif 715169689Skan if (p1->p_ptevents & PTRACE_FORK) { 716169689Skan /* 717169689Skan * Arrange for debugger to receive the fork event. 718169689Skan * 719169689Skan * We can report PL_FLAG_FORKED regardless of 720169689Skan * P_FOLLOWFORK settings, but it does not make a sense 721169689Skan * for runaway child. 722169689Skan */ 723169689Skan td->td_dbgflags |= TDB_FORK; 724169689Skan td->td_dbg_forked = p2->p_pid; 725169689Skan td2->td_dbgflags |= TDB_STOPATFORK; 726169689Skan _PHOLD(p2); 727169689Skan p2_held = 1; 728169689Skan } 729169689Skan if (flags & RFPPWAIT) { 730169689Skan td->td_pflags |= TDP_RFPPWAIT; 731169689Skan td->td_rfppwait_p = p2; 732169689Skan td->td_dbgflags |= TDB_VFORK; 733169689Skan } 734169689Skan PROC_UNLOCK(p2); 735169689Skan if ((flags & RFSTOPPED) == 0) { 736169689Skan /* 737169689Skan * If RFSTOPPED not requested, make child runnable and 738169689Skan * add to run queue. 739169689Skan */ 740169689Skan thread_lock(td2); 741169689Skan TD_SET_CAN_RUN(td2); 742169689Skan sched_add(td2, SRQ_BORING); 743169689Skan thread_unlock(td2); 744169689Skan } 745169689Skan 746169689Skan /* 747169689Skan * Now can be swapped. 748169689Skan */ 749169689Skan _PRELE(p1); 750169689Skan PROC_UNLOCK(p1); 751169689Skan 752169689Skan /* 753169689Skan * Tell any interested parties about the new process. 754169689Skan */ 755169689Skan knote_fork(&p1->p_klist, p2->p_pid); 756169689Skan SDT_PROBE3(proc, , , create, p2, p1, flags); 757169689Skan 758169689Skan /* 759169689Skan * Wait until debugger is attached to child. 760169689Skan */ 761169689Skan PROC_LOCK(p2); 762169689Skan while ((td2->td_dbgflags & TDB_STOPATFORK) != 0) 763169689Skan cv_wait(&p2->p_dbgwait, &p2->p_mtx); 764169689Skan if (p2_held) 765169689Skan _PRELE(p2); 766169689Skan PROC_UNLOCK(p2); 767169689Skan} 768169689Skan 769169689Skanint 770169689Skanfork1(struct thread *td, int flags, int pages, struct proc **procp, 771169689Skan int *procdescp, int pdflags) 772169689Skan{ 773169689Skan struct proc *p1, *newproc; 774169689Skan struct thread *td2; 775169689Skan struct vmspace *vm2; 776169689Skan#ifdef PROCDESC 777169689Skan struct file *fp_procdesc; 778169689Skan#endif 779169689Skan vm_ooffset_t mem_charged; 780169689Skan int error, nprocs_new, ok; 781169689Skan static int curfail; 782169689Skan static struct timeval lastfail; 783169689Skan 784169689Skan /* Check for the undefined or unimplemented flags. */ 785169689Skan if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0) 786169689Skan return (EINVAL); 787169689Skan 788169689Skan /* Signal value requires RFTSIGZMB. */ 789169689Skan if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0) 790169689Skan return (EINVAL); 791169689Skan 792169689Skan /* Can't copy and clear. */ 793169689Skan if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 794169689Skan return (EINVAL); 795169689Skan 796169689Skan /* Check the validity of the signal number. */ 797169689Skan if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG) 798169689Skan return (EINVAL); 799169689Skan 800169689Skan#ifdef PROCDESC 801169689Skan if ((flags & RFPROCDESC) != 0) { 802169689Skan /* Can't not create a process yet get a process descriptor. */ 803169689Skan if ((flags & RFPROC) == 0) 804169689Skan return (EINVAL); 805169689Skan 806169689Skan /* Must provide a place to put a procdesc if creating one. */ 807169689Skan if (procdescp == NULL) 808169689Skan return (EINVAL); 809169689Skan } 810169689Skan#endif 811169689Skan 812169689Skan p1 = td->td_proc; 813169689Skan 814169689Skan /* 815169689Skan * Here we don't create a new process, but we divorce 816169689Skan * certain parts of a process from itself. 817169689Skan */ 818169689Skan if ((flags & RFPROC) == 0) { 819169689Skan *procp = NULL; 820169689Skan return (fork_norfproc(td, flags)); 821169689Skan } 822169689Skan 823169689Skan#ifdef PROCDESC 824169689Skan fp_procdesc = NULL; 825169689Skan#endif 826169689Skan newproc = NULL; 827169689Skan vm2 = NULL; 828169689Skan 829169689Skan /* 830169689Skan * Increment the nprocs resource before allocations occur. 831169689Skan * Although process entries are dynamically created, we still 832169689Skan * keep a global limit on the maximum number we will 833169689Skan * create. There are hard-limits as to the number of processes 834169689Skan * that can run, established by the KVA and memory usage for 835169689Skan * the process data. 836169689Skan * 837169689Skan * Don't allow a nonprivileged user to use the last ten 838169689Skan * processes; don't let root exceed the limit. 839169689Skan */ 840169689Skan nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1; 841169689Skan if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred, 842169689Skan PRIV_MAXPROC, 0) != 0) || nprocs_new >= maxproc) { 843169689Skan sx_xlock(&allproc_lock); 844169689Skan if (ppsratecheck(&lastfail, &curfail, 1)) { 845169689Skan printf("maxproc limit exceeded by uid %u (pid %d); " 846169689Skan "see tuning(7) and login.conf(5)\n", 847169689Skan td->td_ucred->cr_ruid, p1->p_pid); 848169689Skan } 849169689Skan sx_xunlock(&allproc_lock); 850169689Skan error = EAGAIN; 851169689Skan goto fail1; 852169689Skan } 853169689Skan 854169689Skan#ifdef PROCDESC 855169689Skan /* 856169689Skan * If required, create a process descriptor in the parent first; we 857169689Skan * will abandon it if something goes wrong. We don't finit() until 858169689Skan * later. 859169689Skan */ 860169689Skan if (flags & RFPROCDESC) { 861169689Skan error = falloc(td, &fp_procdesc, procdescp, 0); 862169689Skan if (error != 0) 863169689Skan goto fail1; 864169689Skan } 865169689Skan#endif 866169689Skan 867169689Skan mem_charged = 0; 868169689Skan if (pages == 0) 869169689Skan pages = KSTACK_PAGES; 870169689Skan /* Allocate new proc. */ 871169689Skan newproc = uma_zalloc(proc_zone, M_WAITOK); 872169689Skan td2 = FIRST_THREAD_IN_PROC(newproc); 873169689Skan if (td2 == NULL) { 874169689Skan td2 = thread_alloc(pages); 875169689Skan if (td2 == NULL) { 876169689Skan error = ENOMEM; 877169689Skan goto fail2; 878169689Skan } 879169689Skan proc_linkup(newproc, td2); 880169689Skan } else { 881169689Skan if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) { 882169689Skan if (td2->td_kstack != 0) 883169689Skan vm_thread_dispose(td2); 884169689Skan if (!thread_alloc_stack(td2, pages)) { 885169689Skan error = ENOMEM; 886169689Skan goto fail2; 887169689Skan } 888169689Skan } 889169689Skan } 890169689Skan 891169689Skan if ((flags & RFMEM) == 0) { 892169689Skan vm2 = vmspace_fork(p1->p_vmspace, &mem_charged); 893169689Skan if (vm2 == NULL) { 894169689Skan error = ENOMEM; 895169689Skan goto fail2; 896169689Skan } 897169689Skan if (!swap_reserve(mem_charged)) { 898169689Skan /* 899169689Skan * The swap reservation failed. The accounting 900169689Skan * from the entries of the copied vm2 will be 901169689Skan * subtracted in vmspace_free(), so force the 902169689Skan * reservation there. 903169689Skan */ 904169689Skan swap_reserve_force(mem_charged); 905169689Skan error = ENOMEM; 906169689Skan goto fail2; 907169689Skan } 908169689Skan } else 909169689Skan vm2 = NULL; 910169689Skan 911169689Skan /* 912169689Skan * XXX: This is ugly; when we copy resource usage, we need to bump 913169689Skan * per-cred resource counters. 914169689Skan */ 915169689Skan proc_set_cred_init(newproc, crhold(td->td_ucred)); 916169689Skan 917169689Skan /* 918169689Skan * Initialize resource accounting for the child process. 919169689Skan */ 920169689Skan error = racct_proc_fork(p1, newproc); 921169689Skan if (error != 0) { 922169689Skan error = EAGAIN; 923169689Skan goto fail1; 924169689Skan } 925169689Skan 926169689Skan#ifdef MAC 927169689Skan mac_proc_init(newproc); 928169689Skan#endif 929169689Skan knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx); 930169689Skan STAILQ_INIT(&newproc->p_ktr); 931169689Skan 932169689Skan /* We have to lock the process tree while we look for a pid. */ 933169689Skan sx_slock(&proctree_lock); 934169689Skan sx_xlock(&allproc_lock); 935169689Skan 936169689Skan /* 937169689Skan * Increment the count of procs running with this uid. Don't allow 938169689Skan * a nonprivileged user to exceed their current limit. 939169689Skan * 940169689Skan * XXXRW: Can we avoid privilege here if it's not needed? 941169689Skan */ 942169689Skan error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0); 943169689Skan if (error == 0) 944169689Skan ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0); 945169689Skan else { 946169689Skan PROC_LOCK(p1); 947169689Skan ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 948169689Skan lim_cur(p1, RLIMIT_NPROC)); 949169689Skan PROC_UNLOCK(p1); 950169689Skan } 951169689Skan if (ok) { 952169689Skan do_fork(td, flags, newproc, td2, vm2, pdflags); 953169689Skan 954169689Skan /* 955169689Skan * Return child proc pointer to parent. 956169689Skan */ 957169689Skan *procp = newproc; 958169689Skan#ifdef PROCDESC 959169689Skan if (flags & RFPROCDESC) { 960169689Skan procdesc_finit(newproc->p_procdesc, fp_procdesc); 961169689Skan fdrop(fp_procdesc, td); 962169689Skan } 963169689Skan#endif 964169689Skan racct_proc_fork_done(newproc); 965169689Skan return (0); 966169689Skan } 967169689Skan 968169689Skan error = EAGAIN; 969169689Skan sx_sunlock(&proctree_lock); 970169689Skan sx_xunlock(&allproc_lock); 971169689Skan#ifdef MAC 972169689Skan mac_proc_destroy(newproc); 973169689Skan#endif 974169689Skan racct_proc_exit(newproc); 975169689Skanfail1: 976169689Skan crfree(newproc->p_ucred); 977169689Skan newproc->p_ucred = NULL; 978169689Skanfail2: 979169689Skan if (vm2 != NULL) 980169689Skan vmspace_free(vm2); 981169689Skan uma_zfree(proc_zone, newproc); 982169689Skan#ifdef PROCDESC 983169689Skan if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) { 984169689Skan fdclose(td->td_proc->p_fd, fp_procdesc, *procdescp, td); 985169689Skan fdrop(fp_procdesc, td); 986169689Skan } 987169689Skan#endif 988169689Skan atomic_add_int(&nprocs, -1); 989169689Skan pause("fork", hz / 2); 990169689Skan return (error); 991169689Skan} 992169689Skan 993169689Skan/* 994169689Skan * Handle the return of a child process from fork1(). This function 995169689Skan * is called from the MD fork_trampoline() entry point. 996169689Skan */ 997169689Skanvoid 998169689Skanfork_exit(void (*callout)(void *, struct trapframe *), void *arg, 999169689Skan struct trapframe *frame) 1000169689Skan{ 1001169689Skan struct proc *p; 1002169689Skan struct thread *td; 1003169689Skan struct thread *dtd; 1004169689Skan 1005169689Skan td = curthread; 1006169689Skan p = td->td_proc; 1007169689Skan KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new")); 1008169689Skan 1009169689Skan CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)", 1010169689Skan td, td->td_sched, p->p_pid, td->td_name); 1011169689Skan 1012169689Skan sched_fork_exit(td); 1013169689Skan /* 1014169689Skan * Processes normally resume in mi_switch() after being 1015169689Skan * cpu_switch()'ed to, but when children start up they arrive here 1016169689Skan * instead, so we must do much the same things as mi_switch() would. 1017169689Skan */ 1018169689Skan if ((dtd = PCPU_GET(deadthread))) { 1019169689Skan PCPU_SET(deadthread, NULL); 1020169689Skan thread_stash(dtd); 1021169689Skan } 1022169689Skan thread_unlock(td); 1023169689Skan 1024169689Skan /* 1025169689Skan * cpu_set_fork_handler intercepts this function call to 1026169689Skan * have this call a non-return function to stay in kernel mode. 1027169689Skan * initproc has its own fork handler, but it does return. 1028169689Skan */ 1029169689Skan KASSERT(callout != NULL, ("NULL callout in fork_exit")); 1030169689Skan callout(arg, frame); 1031169689Skan 1032169689Skan /* 1033169689Skan * Check if a kernel thread misbehaved and returned from its main 1034169689Skan * function. 1035169689Skan */ 1036169689Skan if (p->p_flag & P_KTHREAD) { 1037169689Skan printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", 1038169689Skan td->td_name, p->p_pid); 1039169689Skan kthread_exit(); 1040169689Skan } 1041169689Skan mtx_assert(&Giant, MA_NOTOWNED); 1042169689Skan 1043169689Skan if (p->p_sysent->sv_schedtail != NULL) 1044169689Skan (p->p_sysent->sv_schedtail)(td); 1045169689Skan} 1046169689Skan 1047169689Skan/* 1048169689Skan * Simplified back end of syscall(), used when returning from fork() 1049169689Skan * directly into user mode. This function is passed in to fork_exit() 1050169689Skan * as the first parameter and is called when returning to a new 1051169689Skan * userland process. 1052169689Skan */ 1053169689Skanvoid 1054169689Skanfork_return(struct thread *td, struct trapframe *frame) 1055169689Skan{ 1056169689Skan struct proc *p, *dbg; 1057169689Skan 1058169689Skan p = td->td_proc; 1059169689Skan if (td->td_dbgflags & TDB_STOPATFORK) { 1060169689Skan sx_xlock(&proctree_lock); 1061169689Skan PROC_LOCK(p); 1062169689Skan if (p->p_pptr->p_ptevents & PTRACE_FORK) { 1063169689Skan /* 1064169689Skan * If debugger still wants auto-attach for the 1065169689Skan * parent's children, do it now. 1066169689Skan */ 1067169689Skan dbg = p->p_pptr->p_pptr; 1068169689Skan proc_set_traced(p, true); 1069169689Skan CTR2(KTR_PTRACE, 1070169689Skan "fork_return: attaching to new child pid %d: oppid %d", 1071169689Skan p->p_pid, p->p_oppid); 1072169689Skan proc_reparent(p, dbg); 1073169689Skan sx_xunlock(&proctree_lock); 1074169689Skan td->td_dbgflags |= TDB_CHILD | TDB_SCX | TDB_FSTP; 1075169689Skan ptracestop(td, SIGSTOP); 1076169689Skan td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX); 1077169689Skan } else { 1078169689Skan /* 1079169689Skan * ... otherwise clear the request. 1080169689Skan */ 1081169689Skan sx_xunlock(&proctree_lock); 1082169689Skan td->td_dbgflags &= ~TDB_STOPATFORK; 1083169689Skan cv_broadcast(&p->p_dbgwait); 1084169689Skan } 1085169689Skan PROC_UNLOCK(p); 1086169689Skan } else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) { 1087169689Skan /* 1088169689Skan * This is the start of a new thread in a traced 1089169689Skan * process. Report a system call exit event. 1090169689Skan */ 1091169689Skan PROC_LOCK(p); 1092169689Skan td->td_dbgflags |= TDB_SCX; 1093169689Skan _STOPEVENT(p, S_SCX, td->td_dbg_sc_code); 1094169689Skan if ((p->p_ptevents & PTRACE_SCX) != 0 || 1095169689Skan (td->td_dbgflags & TDB_BORN) != 0) 1096169689Skan ptracestop(td, SIGTRAP); 1097169689Skan td->td_dbgflags &= ~(TDB_SCX | TDB_BORN); 1098169689Skan PROC_UNLOCK(p); 1099169689Skan } 1100169689Skan 1101169689Skan userret(td, frame); 1102169689Skan 1103169689Skan#ifdef KTRACE 1104169689Skan if (KTRPOINT(td, KTR_SYSRET)) 1105169689Skan ktrsysret(SYS_fork, 0, 0); 1106169689Skan#endif 1107169689Skan} 1108169689Skan