linux_fork.c revision 293528
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2002 Doug Rabson
4 * Copyright (c) 2000 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_fork.c 293528 2016-01-09 16:11:09Z dchagin $");
31
32#include "opt_compat.h"
33#include "opt_kdtrace.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/imgact.h>
38#include <sys/ktr.h>
39#include <sys/lock.h>
40#include <sys/mutex.h>
41#include <sys/proc.h>
42#include <sys/racct.h>
43#include <sys/sched.h>
44#include <sys/syscallsubr.h>
45#include <sys/sx.h>
46#include <sys/unistd.h>
47#include <sys/wait.h>
48
49#include <vm/vm.h>
50#include <vm/pmap.h>
51#include <vm/vm_map.h>
52
53#ifdef COMPAT_LINUX32
54#include <machine/../linux32/linux.h>
55#include <machine/../linux32/linux32_proto.h>
56#else
57#include <machine/../linux/linux.h>
58#include <machine/../linux/linux_proto.h>
59#endif
60#include <compat/linux/linux_signal.h>
61#include <compat/linux/linux_emul.h>
62#include <compat/linux/linux_futex.h>
63#include <compat/linux/linux_misc.h>
64#include <compat/linux/linux_util.h>
65
66int
67linux_fork(struct thread *td, struct linux_fork_args *args)
68{
69	int error;
70	struct proc *p2;
71	struct thread *td2;
72
73#ifdef DEBUG
74	if (ldebug(fork))
75		printf(ARGS(fork, ""));
76#endif
77
78	if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2, NULL, 0))
79	    != 0)
80		return (error);
81
82	td2 = FIRST_THREAD_IN_PROC(p2);
83
84	linux_proc_init(td, td2, 0);
85
86	td->td_retval[0] = p2->p_pid;
87
88	/*
89	 * Make this runnable after we are finished with it.
90	 */
91	thread_lock(td2);
92	TD_SET_CAN_RUN(td2);
93	sched_add(td2, SRQ_BORING);
94	thread_unlock(td2);
95
96	return (0);
97}
98
99int
100linux_vfork(struct thread *td, struct linux_vfork_args *args)
101{
102	int error;
103	struct proc *p2;
104	struct thread *td2;
105
106#ifdef DEBUG
107	if (ldebug(vfork))
108		printf(ARGS(vfork, ""));
109#endif
110
111	/* Exclude RFPPWAIT */
112	if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2,
113	    NULL, 0)) != 0)
114		return (error);
115
116
117	td2 = FIRST_THREAD_IN_PROC(p2);
118
119	linux_proc_init(td, td2, 0);
120
121	PROC_LOCK(p2);
122	p2->p_flag |= P_PPWAIT;
123	PROC_UNLOCK(p2);
124
125   	td->td_retval[0] = p2->p_pid;
126
127	/*
128	 * Make this runnable after we are finished with it.
129	 */
130	thread_lock(td2);
131	TD_SET_CAN_RUN(td2);
132	sched_add(td2, SRQ_BORING);
133	thread_unlock(td2);
134
135	/* wait for the children to exit, ie. emulate vfork */
136	PROC_LOCK(p2);
137	while (p2->p_flag & P_PPWAIT)
138		cv_wait(&p2->p_pwait, &p2->p_mtx);
139	PROC_UNLOCK(p2);
140
141	return (0);
142}
143
144static int
145linux_clone_proc(struct thread *td, struct linux_clone_args *args)
146{
147	int error, ff = RFPROC | RFSTOPPED;
148	struct proc *p2;
149	struct thread *td2;
150	int exit_signal;
151	struct linux_emuldata *em;
152
153#ifdef DEBUG
154	if (ldebug(clone)) {
155		printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, "
156		    "child tid: %p"), (unsigned)args->flags,
157		    args->stack, args->parent_tidptr, args->child_tidptr);
158	}
159#endif
160
161	exit_signal = args->flags & 0x000000ff;
162	if (LINUX_SIG_VALID(exit_signal)) {
163		if (exit_signal <= LINUX_SIGTBLSZ)
164			exit_signal =
165			    linux_to_bsd_signal[_SIG_IDX(exit_signal)];
166	} else if (exit_signal != 0)
167		return (EINVAL);
168
169	if (args->flags & LINUX_CLONE_VM)
170		ff |= RFMEM;
171	if (args->flags & LINUX_CLONE_SIGHAND)
172		ff |= RFSIGSHARE;
173	/*
174	 * XXX: In Linux, sharing of fs info (chroot/cwd/umask)
175	 * and open files is independant.  In FreeBSD, its in one
176	 * structure but in reality it does not cause any problems
177	 * because both of these flags are usually set together.
178	 */
179	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
180		ff |= RFFDG;
181
182	if (args->flags & LINUX_CLONE_PARENT_SETTID)
183		if (args->parent_tidptr == NULL)
184			return (EINVAL);
185
186	error = fork1(td, ff, 0, &p2, NULL, 0);
187	if (error)
188		return (error);
189
190	td2 = FIRST_THREAD_IN_PROC(p2);
191
192	/* create the emuldata */
193	linux_proc_init(td, td2, args->flags);
194
195	em = em_find(td2);
196	KASSERT(em != NULL, ("clone_proc: emuldata not found.\n"));
197
198	if (args->flags & LINUX_CLONE_CHILD_SETTID)
199		em->child_set_tid = args->child_tidptr;
200	else
201	   	em->child_set_tid = NULL;
202
203	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
204		em->child_clear_tid = args->child_tidptr;
205	else
206	   	em->child_clear_tid = NULL;
207
208	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
209		error = copyout(&p2->p_pid, args->parent_tidptr,
210		    sizeof(p2->p_pid));
211		if (error)
212			printf(LMSG("copyout failed!"));
213	}
214
215	PROC_LOCK(p2);
216	p2->p_sigparent = exit_signal;
217	PROC_UNLOCK(p2);
218	/*
219	 * In a case of stack = NULL, we are supposed to COW calling process
220	 * stack. This is what normal fork() does, so we just keep tf_rsp arg
221	 * intact.
222	 */
223	linux_set_upcall_kse(td2, PTROUT(args->stack));
224
225	if (args->flags & LINUX_CLONE_SETTLS)
226		linux_set_cloned_tls(td2, args->tls);
227
228#ifdef DEBUG
229	if (ldebug(clone))
230		printf(LMSG("clone: successful rfork to %d, "
231		    "stack %p sig = %d"), (int)p2->p_pid, args->stack,
232		    exit_signal);
233#endif
234
235	if (args->flags & LINUX_CLONE_VFORK) {
236	   	PROC_LOCK(p2);
237	   	p2->p_flag |= P_PPWAIT;
238	   	PROC_UNLOCK(p2);
239	}
240
241	/*
242	 * Make this runnable after we are finished with it.
243	 */
244	thread_lock(td2);
245	TD_SET_CAN_RUN(td2);
246	sched_add(td2, SRQ_BORING);
247	thread_unlock(td2);
248
249	td->td_retval[0] = p2->p_pid;
250
251	if (args->flags & LINUX_CLONE_VFORK) {
252		/* wait for the children to exit, ie. emulate vfork */
253		PROC_LOCK(p2);
254		while (p2->p_flag & P_PPWAIT)
255			cv_wait(&p2->p_pwait, &p2->p_mtx);
256		PROC_UNLOCK(p2);
257	}
258
259	return (0);
260}
261
262static int
263linux_clone_thread(struct thread *td, struct linux_clone_args *args)
264{
265	struct linux_emuldata *em;
266	struct thread *newtd;
267	struct proc *p;
268	int error;
269
270#ifdef DEBUG
271	if (ldebug(clone)) {
272		printf(ARGS(clone, "thread: flags %x, stack %p, parent tid: %p, "
273		    "child tid: %p"), (unsigned)args->flags,
274		    args->stack, args->parent_tidptr, args->child_tidptr);
275	}
276#endif
277
278	LINUX_CTR4(clone, "thread(%d) flags %x ptid %p ctid %p",
279	    td->td_tid, (unsigned)args->flags,
280	    args->parent_tidptr, args->child_tidptr);
281
282	if (args->flags & LINUX_CLONE_PARENT_SETTID)
283		if (args->parent_tidptr == NULL)
284			return (EINVAL);
285
286	/* Threads should be created with own stack */
287	if (args->stack == NULL)
288		return (EINVAL);
289
290	p = td->td_proc;
291
292	/* Initialize our td */
293	error = kern_thr_alloc(p, 0, &newtd);
294	if (error)
295		return (error);
296
297	cpu_set_upcall(newtd, td);
298
299	bzero(&newtd->td_startzero,
300	    __rangeof(struct thread, td_startzero, td_endzero));
301	bcopy(&td->td_startcopy, &newtd->td_startcopy,
302	    __rangeof(struct thread, td_startcopy, td_endcopy));
303
304	newtd->td_proc = p;
305	newtd->td_ucred = crhold(td->td_ucred);
306
307	/* create the emuldata */
308	linux_proc_init(td, newtd, args->flags);
309
310	em = em_find(newtd);
311	KASSERT(em != NULL, ("clone_thread: emuldata not found.\n"));
312
313	if (args->flags & LINUX_CLONE_SETTLS)
314		linux_set_cloned_tls(newtd, args->tls);
315
316	if (args->flags & LINUX_CLONE_CHILD_SETTID)
317		em->child_set_tid = args->child_tidptr;
318	else
319	   	em->child_set_tid = NULL;
320
321	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
322		em->child_clear_tid = args->child_tidptr;
323	else
324	   	em->child_clear_tid = NULL;
325
326	cpu_thread_clean(newtd);
327
328	linux_set_upcall_kse(newtd, PTROUT(args->stack));
329
330	PROC_LOCK(p);
331	p->p_flag |= P_HADTHREADS;
332	newtd->td_sigmask = td->td_sigmask;
333	bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
334
335	if (args->flags & LINUX_CLONE_PARENT)
336		thread_link(newtd, p->p_pptr);
337	else
338		thread_link(newtd, p);
339
340	thread_lock(td);
341	/* let the scheduler know about these things. */
342	sched_fork_thread(td, newtd);
343	thread_unlock(td);
344	if (P_SHOULDSTOP(p))
345		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
346	PROC_UNLOCK(p);
347
348	tidhash_add(newtd);
349
350#ifdef DEBUG
351	if (ldebug(clone))
352		printf(ARGS(clone, "successful clone to %d, stack %p"),
353		(int)newtd->td_tid, args->stack);
354#endif
355
356	LINUX_CTR2(clone, "thread(%d) successful clone to %d",
357	    td->td_tid, newtd->td_tid);
358
359	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
360		error = copyout(&newtd->td_tid, args->parent_tidptr,
361		    sizeof(newtd->td_tid));
362		if (error)
363			printf(LMSG("clone_thread: copyout failed!"));
364	}
365
366	/*
367	 * Make this runnable after we are finished with it.
368	 */
369	thread_lock(newtd);
370	TD_SET_CAN_RUN(newtd);
371	sched_add(newtd, SRQ_BORING);
372	thread_unlock(newtd);
373
374	td->td_retval[0] = newtd->td_tid;
375
376	return (0);
377}
378
379int
380linux_clone(struct thread *td, struct linux_clone_args *args)
381{
382
383	if (args->flags & LINUX_CLONE_THREAD)
384		return (linux_clone_thread(td, args));
385	else
386		return (linux_clone_proc(td, args));
387}
388
389int
390linux_exit(struct thread *td, struct linux_exit_args *args)
391{
392	struct linux_emuldata *em;
393
394	em = em_find(td);
395	KASSERT(em != NULL, ("exit: emuldata not found.\n"));
396
397	LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval);
398
399	linux_thread_detach(td);
400
401	/*
402	 * XXX. When the last two threads of a process
403	 * exit via pthread_exit() try thr_exit() first.
404	 */
405	kern_thr_exit(td);
406	exit1(td, W_EXITCODE(args->rval, 0));
407		/* NOTREACHED */
408}
409
410int
411linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
412{
413	struct linux_emuldata *em;
414
415	em = em_find(td);
416	KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
417
418	em->child_clear_tid = args->tidptr;
419
420	td->td_retval[0] = em->em_tid;
421
422	LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d",
423	    em->em_tid, args->tidptr, td->td_retval[0]);
424
425	return (0);
426}
427
428void
429linux_thread_detach(struct thread *td)
430{
431	struct linux_sys_futex_args cup;
432	struct linux_emuldata *em;
433	int *child_clear_tid;
434	int error;
435
436	em = em_find(td);
437	KASSERT(em != NULL, ("thread_detach: emuldata not found.\n"));
438
439	LINUX_CTR1(exit, "thread detach(%d)", em->em_tid);
440
441	release_futexes(td, em);
442
443	child_clear_tid = em->child_clear_tid;
444
445	if (child_clear_tid != NULL) {
446
447		LINUX_CTR2(exit, "thread detach(%d) %p",
448		    em->em_tid, child_clear_tid);
449
450		error = suword32(child_clear_tid, 0);
451		if (error != 0)
452			return;
453
454		cup.uaddr = child_clear_tid;
455		cup.op = LINUX_FUTEX_WAKE;
456		cup.val = 1;		/* wake one */
457		cup.timeout = NULL;
458		cup.uaddr2 = NULL;
459		cup.val3 = 0;
460		error = linux_sys_futex(td, &cup);
461		/*
462		 * this cannot happen at the moment and if this happens it
463		 * probably means there is a user space bug
464		 */
465		if (error != 0)
466			linux_msg(td, "futex stuff in thread_detach failed.");
467	}
468}
469