linux_fork.c revision 293554
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2002 Doug Rabson
4 * Copyright (c) 2000 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_fork.c 293554 2016-01-09 16:58:03Z dchagin $");
31
32#include "opt_compat.h"
33#include "opt_kdtrace.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/imgact.h>
38#include <sys/ktr.h>
39#include <sys/lock.h>
40#include <sys/mutex.h>
41#include <sys/proc.h>
42#include <sys/racct.h>
43#include <sys/sched.h>
44#include <sys/syscallsubr.h>
45#include <sys/sx.h>
46#include <sys/unistd.h>
47#include <sys/wait.h>
48
49#include <vm/vm.h>
50#include <vm/pmap.h>
51#include <vm/vm_map.h>
52
53#ifdef COMPAT_LINUX32
54#include <machine/../linux32/linux.h>
55#include <machine/../linux32/linux32_proto.h>
56#else
57#include <machine/../linux/linux.h>
58#include <machine/../linux/linux_proto.h>
59#endif
60#include <compat/linux/linux_signal.h>
61#include <compat/linux/linux_emul.h>
62#include <compat/linux/linux_futex.h>
63#include <compat/linux/linux_misc.h>
64#include <compat/linux/linux_util.h>
65
66int
67linux_fork(struct thread *td, struct linux_fork_args *args)
68{
69	int error;
70	struct proc *p2;
71	struct thread *td2;
72
73#ifdef DEBUG
74	if (ldebug(fork))
75		printf(ARGS(fork, ""));
76#endif
77
78	if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2, NULL, 0))
79	    != 0)
80		return (error);
81
82	td2 = FIRST_THREAD_IN_PROC(p2);
83
84	linux_proc_init(td, td2, 0);
85
86	td->td_retval[0] = p2->p_pid;
87
88	/*
89	 * Make this runnable after we are finished with it.
90	 */
91	thread_lock(td2);
92	TD_SET_CAN_RUN(td2);
93	sched_add(td2, SRQ_BORING);
94	thread_unlock(td2);
95
96	return (0);
97}
98
99int
100linux_vfork(struct thread *td, struct linux_vfork_args *args)
101{
102	int error;
103	struct proc *p2;
104	struct thread *td2;
105
106#ifdef DEBUG
107	if (ldebug(vfork))
108		printf(ARGS(vfork, ""));
109#endif
110
111	/* Exclude RFPPWAIT */
112	if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2,
113	    NULL, 0)) != 0)
114		return (error);
115
116
117	td2 = FIRST_THREAD_IN_PROC(p2);
118
119	linux_proc_init(td, td2, 0);
120
121	PROC_LOCK(p2);
122	p2->p_flag |= P_PPWAIT;
123	PROC_UNLOCK(p2);
124
125   	td->td_retval[0] = p2->p_pid;
126
127	/*
128	 * Make this runnable after we are finished with it.
129	 */
130	thread_lock(td2);
131	TD_SET_CAN_RUN(td2);
132	sched_add(td2, SRQ_BORING);
133	thread_unlock(td2);
134
135	/* wait for the children to exit, ie. emulate vfork */
136	PROC_LOCK(p2);
137	while (p2->p_flag & P_PPWAIT)
138		cv_wait(&p2->p_pwait, &p2->p_mtx);
139	PROC_UNLOCK(p2);
140
141	return (0);
142}
143
144static int
145linux_clone_proc(struct thread *td, struct linux_clone_args *args)
146{
147	int error, ff = RFPROC | RFSTOPPED;
148	struct proc *p2;
149	struct thread *td2;
150	int exit_signal;
151	struct linux_emuldata *em;
152
153#ifdef DEBUG
154	if (ldebug(clone)) {
155		printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, "
156		    "child tid: %p"), (unsigned)args->flags,
157		    args->stack, args->parent_tidptr, args->child_tidptr);
158	}
159#endif
160
161	exit_signal = args->flags & 0x000000ff;
162	if (LINUX_SIG_VALID(exit_signal)) {
163		if (exit_signal <= LINUX_SIGTBLSZ)
164			exit_signal =
165			    linux_to_bsd_signal[_SIG_IDX(exit_signal)];
166	} else if (exit_signal != 0)
167		return (EINVAL);
168
169	if (args->flags & LINUX_CLONE_VM)
170		ff |= RFMEM;
171	if (args->flags & LINUX_CLONE_SIGHAND)
172		ff |= RFSIGSHARE;
173	/*
174	 * XXX: In Linux, sharing of fs info (chroot/cwd/umask)
175	 * and open files is independant.  In FreeBSD, its in one
176	 * structure but in reality it does not cause any problems
177	 * because both of these flags are usually set together.
178	 */
179	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
180		ff |= RFFDG;
181
182	if (args->flags & LINUX_CLONE_PARENT_SETTID)
183		if (args->parent_tidptr == NULL)
184			return (EINVAL);
185
186	error = fork1(td, ff, 0, &p2, NULL, 0);
187	if (error)
188		return (error);
189
190	td2 = FIRST_THREAD_IN_PROC(p2);
191
192	/* create the emuldata */
193	linux_proc_init(td, td2, args->flags);
194
195	em = em_find(td2);
196	KASSERT(em != NULL, ("clone_proc: emuldata not found.\n"));
197
198	if (args->flags & LINUX_CLONE_CHILD_SETTID)
199		em->child_set_tid = args->child_tidptr;
200	else
201	   	em->child_set_tid = NULL;
202
203	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
204		em->child_clear_tid = args->child_tidptr;
205	else
206	   	em->child_clear_tid = NULL;
207
208	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
209		error = copyout(&p2->p_pid, args->parent_tidptr,
210		    sizeof(p2->p_pid));
211		if (error)
212			printf(LMSG("copyout failed!"));
213	}
214
215	PROC_LOCK(p2);
216	p2->p_sigparent = exit_signal;
217	PROC_UNLOCK(p2);
218	/*
219	 * In a case of stack = NULL, we are supposed to COW calling process
220	 * stack. This is what normal fork() does, so we just keep tf_rsp arg
221	 * intact.
222	 */
223	linux_set_upcall_kse(td2, PTROUT(args->stack));
224
225	if (args->flags & LINUX_CLONE_SETTLS)
226		linux_set_cloned_tls(td2, args->tls);
227
228#ifdef DEBUG
229	if (ldebug(clone))
230		printf(LMSG("clone: successful rfork to %d, "
231		    "stack %p sig = %d"), (int)p2->p_pid, args->stack,
232		    exit_signal);
233#endif
234
235	if (args->flags & LINUX_CLONE_VFORK) {
236	   	PROC_LOCK(p2);
237	   	p2->p_flag |= P_PPWAIT;
238	   	PROC_UNLOCK(p2);
239	}
240
241	/*
242	 * Make this runnable after we are finished with it.
243	 */
244	thread_lock(td2);
245	TD_SET_CAN_RUN(td2);
246	sched_add(td2, SRQ_BORING);
247	thread_unlock(td2);
248
249	td->td_retval[0] = p2->p_pid;
250
251	if (args->flags & LINUX_CLONE_VFORK) {
252		/* wait for the children to exit, ie. emulate vfork */
253		PROC_LOCK(p2);
254		while (p2->p_flag & P_PPWAIT)
255			cv_wait(&p2->p_pwait, &p2->p_mtx);
256		PROC_UNLOCK(p2);
257	}
258
259	return (0);
260}
261
262static int
263linux_clone_thread(struct thread *td, struct linux_clone_args *args)
264{
265	struct linux_emuldata *em;
266	struct thread *newtd;
267	struct proc *p;
268	int error;
269
270#ifdef DEBUG
271	if (ldebug(clone)) {
272		printf(ARGS(clone, "thread: flags %x, stack %p, parent tid: %p, "
273		    "child tid: %p"), (unsigned)args->flags,
274		    args->stack, args->parent_tidptr, args->child_tidptr);
275	}
276#endif
277
278	LINUX_CTR4(clone, "thread(%d) flags %x ptid %p ctid %p",
279	    td->td_tid, (unsigned)args->flags,
280	    args->parent_tidptr, args->child_tidptr);
281
282	if (args->flags & LINUX_CLONE_PARENT_SETTID)
283		if (args->parent_tidptr == NULL)
284			return (EINVAL);
285
286	/* Threads should be created with own stack */
287	if (args->stack == NULL)
288		return (EINVAL);
289
290	p = td->td_proc;
291
292	/* Initialize our td */
293	error = kern_thr_alloc(p, 0, &newtd);
294	if (error)
295		return (error);
296
297	cpu_set_upcall(newtd, td);
298
299	bzero(&newtd->td_startzero,
300	    __rangeof(struct thread, td_startzero, td_endzero));
301	bcopy(&td->td_startcopy, &newtd->td_startcopy,
302	    __rangeof(struct thread, td_startcopy, td_endcopy));
303
304	newtd->td_proc = p;
305	newtd->td_ucred = crhold(td->td_ucred);
306
307	/* create the emuldata */
308	linux_proc_init(td, newtd, args->flags);
309
310	em = em_find(newtd);
311	KASSERT(em != NULL, ("clone_thread: emuldata not found.\n"));
312
313	if (args->flags & LINUX_CLONE_SETTLS)
314		linux_set_cloned_tls(newtd, args->tls);
315
316	if (args->flags & LINUX_CLONE_CHILD_SETTID)
317		em->child_set_tid = args->child_tidptr;
318	else
319	   	em->child_set_tid = NULL;
320
321	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
322		em->child_clear_tid = args->child_tidptr;
323	else
324	   	em->child_clear_tid = NULL;
325
326	cpu_thread_clean(newtd);
327
328	linux_set_upcall_kse(newtd, PTROUT(args->stack));
329
330	PROC_LOCK(p);
331	p->p_flag |= P_HADTHREADS;
332	bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
333
334	if (args->flags & LINUX_CLONE_PARENT)
335		thread_link(newtd, p->p_pptr);
336	else
337		thread_link(newtd, p);
338
339	thread_lock(td);
340	/* let the scheduler know about these things. */
341	sched_fork_thread(td, newtd);
342	thread_unlock(td);
343	if (P_SHOULDSTOP(p))
344		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
345	PROC_UNLOCK(p);
346
347	tidhash_add(newtd);
348
349#ifdef DEBUG
350	if (ldebug(clone))
351		printf(ARGS(clone, "successful clone to %d, stack %p"),
352		(int)newtd->td_tid, args->stack);
353#endif
354
355	LINUX_CTR2(clone, "thread(%d) successful clone to %d",
356	    td->td_tid, newtd->td_tid);
357
358	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
359		error = copyout(&newtd->td_tid, args->parent_tidptr,
360		    sizeof(newtd->td_tid));
361		if (error)
362			printf(LMSG("clone_thread: copyout failed!"));
363	}
364
365	/*
366	 * Make this runnable after we are finished with it.
367	 */
368	thread_lock(newtd);
369	TD_SET_CAN_RUN(newtd);
370	sched_add(newtd, SRQ_BORING);
371	thread_unlock(newtd);
372
373	td->td_retval[0] = newtd->td_tid;
374
375	return (0);
376}
377
378int
379linux_clone(struct thread *td, struct linux_clone_args *args)
380{
381
382	if (args->flags & LINUX_CLONE_THREAD)
383		return (linux_clone_thread(td, args));
384	else
385		return (linux_clone_proc(td, args));
386}
387
388int
389linux_exit(struct thread *td, struct linux_exit_args *args)
390{
391	struct linux_emuldata *em;
392
393	em = em_find(td);
394	KASSERT(em != NULL, ("exit: emuldata not found.\n"));
395
396	LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval);
397
398	linux_thread_detach(td);
399
400	/*
401	 * XXX. When the last two threads of a process
402	 * exit via pthread_exit() try thr_exit() first.
403	 */
404	kern_thr_exit(td);
405	exit1(td, W_EXITCODE(args->rval, 0));
406		/* NOTREACHED */
407}
408
409int
410linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
411{
412	struct linux_emuldata *em;
413
414	em = em_find(td);
415	KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
416
417	em->child_clear_tid = args->tidptr;
418
419	td->td_retval[0] = em->em_tid;
420
421	LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d",
422	    em->em_tid, args->tidptr, td->td_retval[0]);
423
424	return (0);
425}
426
427void
428linux_thread_detach(struct thread *td)
429{
430	struct linux_sys_futex_args cup;
431	struct linux_emuldata *em;
432	int *child_clear_tid;
433	int error;
434
435	em = em_find(td);
436	KASSERT(em != NULL, ("thread_detach: emuldata not found.\n"));
437
438	LINUX_CTR1(exit, "thread detach(%d)", em->em_tid);
439
440	release_futexes(td, em);
441
442	child_clear_tid = em->child_clear_tid;
443
444	if (child_clear_tid != NULL) {
445
446		LINUX_CTR2(exit, "thread detach(%d) %p",
447		    em->em_tid, child_clear_tid);
448
449		error = suword32(child_clear_tid, 0);
450		if (error != 0)
451			return;
452
453		cup.uaddr = child_clear_tid;
454		cup.op = LINUX_FUTEX_WAKE;
455		cup.val = 1;		/* wake one */
456		cup.timeout = NULL;
457		cup.uaddr2 = NULL;
458		cup.val3 = 0;
459		error = linux_sys_futex(td, &cup);
460		/*
461		 * this cannot happen at the moment and if this happens it
462		 * probably means there is a user space bug
463		 */
464		if (error != 0)
465			linux_msg(td, "futex stuff in thread_detach failed.");
466	}
467}
468