linux_misc.c revision 293516
1/*-
2 * Copyright (c) 2002 Doug Rabson
3 * Copyright (c) 1994-1995 S��ren Schmidt
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_misc.c 293516 2016-01-09 15:48:11Z dchagin $");
32
33#include "opt_compat.h"
34#include "opt_kdtrace.h"
35
36#include <sys/param.h>
37#include <sys/blist.h>
38#include <sys/fcntl.h>
39#if defined(__i386__)
40#include <sys/imgact_aout.h>
41#endif
42#include <sys/jail.h>
43#include <sys/kernel.h>
44#include <sys/limits.h>
45#include <sys/lock.h>
46#include <sys/malloc.h>
47#include <sys/mman.h>
48#include <sys/mount.h>
49#include <sys/mutex.h>
50#include <sys/namei.h>
51#include <sys/priv.h>
52#include <sys/proc.h>
53#include <sys/reboot.h>
54#include <sys/racct.h>
55#include <sys/resourcevar.h>
56#include <sys/sched.h>
57#include <sys/signalvar.h>
58#include <sys/stat.h>
59#include <sys/syscallsubr.h>
60#include <sys/sysctl.h>
61#include <sys/sysproto.h>
62#include <sys/systm.h>
63#include <sys/time.h>
64#include <sys/vmmeter.h>
65#include <sys/vnode.h>
66#include <sys/wait.h>
67#include <sys/cpuset.h>
68
69#include <security/mac/mac_framework.h>
70
71#include <vm/vm.h>
72#include <vm/pmap.h>
73#include <vm/vm_kern.h>
74#include <vm/vm_map.h>
75#include <vm/vm_extern.h>
76#include <vm/vm_object.h>
77#include <vm/swap_pager.h>
78
79#ifdef COMPAT_LINUX32
80#include <machine/../linux32/linux.h>
81#include <machine/../linux32/linux32_proto.h>
82#else
83#include <machine/../linux/linux.h>
84#include <machine/../linux/linux_proto.h>
85#endif
86
87#include <compat/linux/linux_file.h>
88#include <compat/linux/linux_mib.h>
89#include <compat/linux/linux_signal.h>
90#include <compat/linux/linux_util.h>
91#include <compat/linux/linux_sysproto.h>
92#include <compat/linux/linux_emul.h>
93#include <compat/linux/linux_misc.h>
94
95int stclohz;				/* Statistics clock frequency */
96
97static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
98	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
99	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
100	RLIMIT_MEMLOCK, RLIMIT_AS
101};
102
103struct l_sysinfo {
104	l_long		uptime;		/* Seconds since boot */
105	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
106#define LINUX_SYSINFO_LOADS_SCALE 65536
107	l_ulong		totalram;	/* Total usable main memory size */
108	l_ulong		freeram;	/* Available memory size */
109	l_ulong		sharedram;	/* Amount of shared memory */
110	l_ulong		bufferram;	/* Memory used by buffers */
111	l_ulong		totalswap;	/* Total swap space size */
112	l_ulong		freeswap;	/* swap space still available */
113	l_ushort	procs;		/* Number of current processes */
114	l_ushort	pads;
115	l_ulong		totalbig;
116	l_ulong		freebig;
117	l_uint		mem_unit;
118	char		_f[20-2*sizeof(l_long)-sizeof(l_int)];	/* padding */
119};
120
121struct l_pselect6arg {
122	l_uintptr_t	ss;
123	l_size_t	ss_len;
124};
125
126int
127linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
128{
129	struct l_sysinfo sysinfo;
130	vm_object_t object;
131	int i, j;
132	struct timespec ts;
133
134	getnanouptime(&ts);
135	if (ts.tv_nsec != 0)
136		ts.tv_sec++;
137	sysinfo.uptime = ts.tv_sec;
138
139	/* Use the information from the mib to get our load averages */
140	for (i = 0; i < 3; i++)
141		sysinfo.loads[i] = averunnable.ldavg[i] *
142		    LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
143
144	sysinfo.totalram = physmem * PAGE_SIZE;
145	sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
146
147	sysinfo.sharedram = 0;
148	mtx_lock(&vm_object_list_mtx);
149	TAILQ_FOREACH(object, &vm_object_list, object_list)
150		if (object->shadow_count > 1)
151			sysinfo.sharedram += object->resident_page_count;
152	mtx_unlock(&vm_object_list_mtx);
153
154	sysinfo.sharedram *= PAGE_SIZE;
155	sysinfo.bufferram = 0;
156
157	swap_pager_status(&i, &j);
158	sysinfo.totalswap = i * PAGE_SIZE;
159	sysinfo.freeswap = (i - j) * PAGE_SIZE;
160
161	sysinfo.procs = nprocs;
162
163	/* The following are only present in newer Linux kernels. */
164	sysinfo.totalbig = 0;
165	sysinfo.freebig = 0;
166	sysinfo.mem_unit = 1;
167
168	return (copyout(&sysinfo, args->info, sizeof(sysinfo)));
169}
170
171int
172linux_alarm(struct thread *td, struct linux_alarm_args *args)
173{
174	struct itimerval it, old_it;
175	u_int secs;
176	int error;
177
178#ifdef DEBUG
179	if (ldebug(alarm))
180		printf(ARGS(alarm, "%u"), args->secs);
181#endif
182
183	secs = args->secs;
184
185	if (secs > INT_MAX)
186		secs = INT_MAX;
187
188	it.it_value.tv_sec = (long) secs;
189	it.it_value.tv_usec = 0;
190	it.it_interval.tv_sec = 0;
191	it.it_interval.tv_usec = 0;
192	error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
193	if (error)
194		return (error);
195	if (timevalisset(&old_it.it_value)) {
196		if (old_it.it_value.tv_usec != 0)
197			old_it.it_value.tv_sec++;
198		td->td_retval[0] = old_it.it_value.tv_sec;
199	}
200	return (0);
201}
202
203int
204linux_brk(struct thread *td, struct linux_brk_args *args)
205{
206	struct vmspace *vm = td->td_proc->p_vmspace;
207	vm_offset_t new, old;
208	struct obreak_args /* {
209		char * nsize;
210	} */ tmp;
211
212#ifdef DEBUG
213	if (ldebug(brk))
214		printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend);
215#endif
216	old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
217	new = (vm_offset_t)args->dsend;
218	tmp.nsize = (char *)new;
219	if (((caddr_t)new > vm->vm_daddr) && !sys_obreak(td, &tmp))
220		td->td_retval[0] = (long)new;
221	else
222		td->td_retval[0] = (long)old;
223
224	return (0);
225}
226
227#if defined(__i386__)
228/* XXX: what about amd64/linux32? */
229
230int
231linux_uselib(struct thread *td, struct linux_uselib_args *args)
232{
233	struct nameidata ni;
234	struct vnode *vp;
235	struct exec *a_out;
236	struct vattr attr;
237	vm_offset_t vmaddr;
238	unsigned long file_offset;
239	unsigned long bss_size;
240	char *library;
241	ssize_t aresid;
242	int error, locked, writecount;
243
244	LCONVPATHEXIST(td, args->library, &library);
245
246#ifdef DEBUG
247	if (ldebug(uselib))
248		printf(ARGS(uselib, "%s"), library);
249#endif
250
251	a_out = NULL;
252	locked = 0;
253	vp = NULL;
254
255	NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
256	    UIO_SYSSPACE, library, td);
257	error = namei(&ni);
258	LFREEPATH(library);
259	if (error)
260		goto cleanup;
261
262	vp = ni.ni_vp;
263	NDFREE(&ni, NDF_ONLY_PNBUF);
264
265	/*
266	 * From here on down, we have a locked vnode that must be unlocked.
267	 * XXX: The code below largely duplicates exec_check_permissions().
268	 */
269	locked = 1;
270
271	/* Writable? */
272	error = VOP_GET_WRITECOUNT(vp, &writecount);
273	if (error != 0)
274		goto cleanup;
275	if (writecount != 0) {
276		error = ETXTBSY;
277		goto cleanup;
278	}
279
280	/* Executable? */
281	error = VOP_GETATTR(vp, &attr, td->td_ucred);
282	if (error)
283		goto cleanup;
284
285	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
286	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
287		/* EACCESS is what exec(2) returns. */
288		error = ENOEXEC;
289		goto cleanup;
290	}
291
292	/* Sensible size? */
293	if (attr.va_size == 0) {
294		error = ENOEXEC;
295		goto cleanup;
296	}
297
298	/* Can we access it? */
299	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
300	if (error)
301		goto cleanup;
302
303	/*
304	 * XXX: This should use vn_open() so that it is properly authorized,
305	 * and to reduce code redundancy all over the place here.
306	 * XXX: Not really, it duplicates far more of exec_check_permissions()
307	 * than vn_open().
308	 */
309#ifdef MAC
310	error = mac_vnode_check_open(td->td_ucred, vp, VREAD);
311	if (error)
312		goto cleanup;
313#endif
314	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
315	if (error)
316		goto cleanup;
317
318	/* Pull in executable header into exec_map */
319	error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
320	    VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0);
321	if (error)
322		goto cleanup;
323
324	/* Is it a Linux binary ? */
325	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
326		error = ENOEXEC;
327		goto cleanup;
328	}
329
330	/*
331	 * While we are here, we should REALLY do some more checks
332	 */
333
334	/* Set file/virtual offset based on a.out variant. */
335	switch ((int)(a_out->a_magic & 0xffff)) {
336	case 0413:			/* ZMAGIC */
337		file_offset = 1024;
338		break;
339	case 0314:			/* QMAGIC */
340		file_offset = 0;
341		break;
342	default:
343		error = ENOEXEC;
344		goto cleanup;
345	}
346
347	bss_size = round_page(a_out->a_bss);
348
349	/* Check various fields in header for validity/bounds. */
350	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
351		error = ENOEXEC;
352		goto cleanup;
353	}
354
355	/* text + data can't exceed file size */
356	if (a_out->a_data + a_out->a_text > attr.va_size) {
357		error = EFAULT;
358		goto cleanup;
359	}
360
361	/*
362	 * text/data/bss must not exceed limits
363	 * XXX - this is not complete. it should check current usage PLUS
364	 * the resources needed by this library.
365	 */
366	PROC_LOCK(td->td_proc);
367	if (a_out->a_text > maxtsiz ||
368	    a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA) ||
369	    racct_set(td->td_proc, RACCT_DATA, a_out->a_data +
370	    bss_size) != 0) {
371		PROC_UNLOCK(td->td_proc);
372		error = ENOMEM;
373		goto cleanup;
374	}
375	PROC_UNLOCK(td->td_proc);
376
377	/*
378	 * Prevent more writers.
379	 * XXX: Note that if any of the VM operations fail below we don't
380	 * clear this flag.
381	 */
382	VOP_SET_TEXT(vp);
383
384	/*
385	 * Lock no longer needed
386	 */
387	locked = 0;
388	VOP_UNLOCK(vp, 0);
389
390	/*
391	 * Check if file_offset page aligned. Currently we cannot handle
392	 * misalinged file offsets, and so we read in the entire image
393	 * (what a waste).
394	 */
395	if (file_offset & PAGE_MASK) {
396#ifdef DEBUG
397		printf("uselib: Non page aligned binary %lu\n", file_offset);
398#endif
399		/* Map text+data read/write/execute */
400
401		/* a_entry is the load address and is page aligned */
402		vmaddr = trunc_page(a_out->a_entry);
403
404		/* get anon user mapping, read+write+execute */
405		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
406		    &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE,
407		    VM_PROT_ALL, VM_PROT_ALL, 0);
408		if (error)
409			goto cleanup;
410
411		error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset,
412		    a_out->a_text + a_out->a_data, UIO_USERSPACE, 0,
413		    td->td_ucred, NOCRED, &aresid, td);
414		if (error != 0)
415			goto cleanup;
416		if (aresid != 0) {
417			error = ENOEXEC;
418			goto cleanup;
419		}
420	} else {
421#ifdef DEBUG
422		printf("uselib: Page aligned binary %lu\n", file_offset);
423#endif
424		/*
425		 * for QMAGIC, a_entry is 20 bytes beyond the load address
426		 * to skip the executable header
427		 */
428		vmaddr = trunc_page(a_out->a_entry);
429
430		/*
431		 * Map it all into the process's space as a single
432		 * copy-on-write "data" segment.
433		 */
434		error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
435		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
436		    MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
437		if (error)
438			goto cleanup;
439	}
440#ifdef DEBUG
441	printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0],
442	    ((long *)vmaddr)[1]);
443#endif
444	if (bss_size != 0) {
445		/* Calculate BSS start address */
446		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
447		    a_out->a_data;
448
449		/* allocate some 'anon' space */
450		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
451		    &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL,
452		    VM_PROT_ALL, 0);
453		if (error)
454			goto cleanup;
455	}
456
457cleanup:
458	/* Unlock vnode if needed */
459	if (locked)
460		VOP_UNLOCK(vp, 0);
461
462	/* Release the temporary mapping. */
463	if (a_out)
464		kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE);
465
466	return (error);
467}
468
469#endif	/* __i386__ */
470
471int
472linux_select(struct thread *td, struct linux_select_args *args)
473{
474	l_timeval ltv;
475	struct timeval tv0, tv1, utv, *tvp;
476	int error;
477
478#ifdef DEBUG
479	if (ldebug(select))
480		printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
481		    (void *)args->readfds, (void *)args->writefds,
482		    (void *)args->exceptfds, (void *)args->timeout);
483#endif
484
485	/*
486	 * Store current time for computation of the amount of
487	 * time left.
488	 */
489	if (args->timeout) {
490		if ((error = copyin(args->timeout, &ltv, sizeof(ltv))))
491			goto select_out;
492		utv.tv_sec = ltv.tv_sec;
493		utv.tv_usec = ltv.tv_usec;
494#ifdef DEBUG
495		if (ldebug(select))
496			printf(LMSG("incoming timeout (%jd/%ld)"),
497			    (intmax_t)utv.tv_sec, utv.tv_usec);
498#endif
499
500		if (itimerfix(&utv)) {
501			/*
502			 * The timeval was invalid.  Convert it to something
503			 * valid that will act as it does under Linux.
504			 */
505			utv.tv_sec += utv.tv_usec / 1000000;
506			utv.tv_usec %= 1000000;
507			if (utv.tv_usec < 0) {
508				utv.tv_sec -= 1;
509				utv.tv_usec += 1000000;
510			}
511			if (utv.tv_sec < 0)
512				timevalclear(&utv);
513		}
514		microtime(&tv0);
515		tvp = &utv;
516	} else
517		tvp = NULL;
518
519	error = kern_select(td, args->nfds, args->readfds, args->writefds,
520	    args->exceptfds, tvp, sizeof(l_int) * 8);
521
522#ifdef DEBUG
523	if (ldebug(select))
524		printf(LMSG("real select returns %d"), error);
525#endif
526	if (error)
527		goto select_out;
528
529	if (args->timeout) {
530		if (td->td_retval[0]) {
531			/*
532			 * Compute how much time was left of the timeout,
533			 * by subtracting the current time and the time
534			 * before we started the call, and subtracting
535			 * that result from the user-supplied value.
536			 */
537			microtime(&tv1);
538			timevalsub(&tv1, &tv0);
539			timevalsub(&utv, &tv1);
540			if (utv.tv_sec < 0)
541				timevalclear(&utv);
542		} else
543			timevalclear(&utv);
544#ifdef DEBUG
545		if (ldebug(select))
546			printf(LMSG("outgoing timeout (%jd/%ld)"),
547			    (intmax_t)utv.tv_sec, utv.tv_usec);
548#endif
549		ltv.tv_sec = utv.tv_sec;
550		ltv.tv_usec = utv.tv_usec;
551		if ((error = copyout(&ltv, args->timeout, sizeof(ltv))))
552			goto select_out;
553	}
554
555select_out:
556#ifdef DEBUG
557	if (ldebug(select))
558		printf(LMSG("select_out -> %d"), error);
559#endif
560	return (error);
561}
562
563int
564linux_mremap(struct thread *td, struct linux_mremap_args *args)
565{
566	struct munmap_args /* {
567		void *addr;
568		size_t len;
569	} */ bsd_args;
570	int error = 0;
571
572#ifdef DEBUG
573	if (ldebug(mremap))
574		printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
575		    (void *)(uintptr_t)args->addr,
576		    (unsigned long)args->old_len,
577		    (unsigned long)args->new_len,
578		    (unsigned long)args->flags);
579#endif
580
581	if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) {
582		td->td_retval[0] = 0;
583		return (EINVAL);
584	}
585
586	/*
587	 * Check for the page alignment.
588	 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK.
589	 */
590	if (args->addr & PAGE_MASK) {
591		td->td_retval[0] = 0;
592		return (EINVAL);
593	}
594
595	args->new_len = round_page(args->new_len);
596	args->old_len = round_page(args->old_len);
597
598	if (args->new_len > args->old_len) {
599		td->td_retval[0] = 0;
600		return (ENOMEM);
601	}
602
603	if (args->new_len < args->old_len) {
604		bsd_args.addr =
605		    (caddr_t)((uintptr_t)args->addr + args->new_len);
606		bsd_args.len = args->old_len - args->new_len;
607		error = sys_munmap(td, &bsd_args);
608	}
609
610	td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
611	return (error);
612}
613
614#define LINUX_MS_ASYNC       0x0001
615#define LINUX_MS_INVALIDATE  0x0002
616#define LINUX_MS_SYNC        0x0004
617
618int
619linux_msync(struct thread *td, struct linux_msync_args *args)
620{
621	struct msync_args bsd_args;
622
623	bsd_args.addr = (caddr_t)(uintptr_t)args->addr;
624	bsd_args.len = (uintptr_t)args->len;
625	bsd_args.flags = args->fl & ~LINUX_MS_SYNC;
626
627	return (sys_msync(td, &bsd_args));
628}
629
630int
631linux_time(struct thread *td, struct linux_time_args *args)
632{
633	struct timeval tv;
634	l_time_t tm;
635	int error;
636
637#ifdef DEBUG
638	if (ldebug(time))
639		printf(ARGS(time, "*"));
640#endif
641
642	microtime(&tv);
643	tm = tv.tv_sec;
644	if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
645		return (error);
646	td->td_retval[0] = tm;
647	return (0);
648}
649
650struct l_times_argv {
651	l_clock_t	tms_utime;
652	l_clock_t	tms_stime;
653	l_clock_t	tms_cutime;
654	l_clock_t	tms_cstime;
655};
656
657
658/*
659 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value.
660 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK
661 * auxiliary vector entry.
662 */
663#define	CLK_TCK		100
664
665#define	CONVOTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
666#define	CONVNTCK(r)	(r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz))
667
668#define	CONVTCK(r)	(linux_kernver(td) >= LINUX_KERNVER_2004000 ?		\
669			    CONVNTCK(r) : CONVOTCK(r))
670
671int
672linux_times(struct thread *td, struct linux_times_args *args)
673{
674	struct timeval tv, utime, stime, cutime, cstime;
675	struct l_times_argv tms;
676	struct proc *p;
677	int error;
678
679#ifdef DEBUG
680	if (ldebug(times))
681		printf(ARGS(times, "*"));
682#endif
683
684	if (args->buf != NULL) {
685		p = td->td_proc;
686		PROC_LOCK(p);
687		PROC_STATLOCK(p);
688		calcru(p, &utime, &stime);
689		PROC_STATUNLOCK(p);
690		calccru(p, &cutime, &cstime);
691		PROC_UNLOCK(p);
692
693		tms.tms_utime = CONVTCK(utime);
694		tms.tms_stime = CONVTCK(stime);
695
696		tms.tms_cutime = CONVTCK(cutime);
697		tms.tms_cstime = CONVTCK(cstime);
698
699		if ((error = copyout(&tms, args->buf, sizeof(tms))))
700			return (error);
701	}
702
703	microuptime(&tv);
704	td->td_retval[0] = (int)CONVTCK(tv);
705	return (0);
706}
707
708int
709linux_newuname(struct thread *td, struct linux_newuname_args *args)
710{
711	struct l_new_utsname utsname;
712	char osname[LINUX_MAX_UTSNAME];
713	char osrelease[LINUX_MAX_UTSNAME];
714	char *p;
715
716#ifdef DEBUG
717	if (ldebug(newuname))
718		printf(ARGS(newuname, "*"));
719#endif
720
721	linux_get_osname(td, osname);
722	linux_get_osrelease(td, osrelease);
723
724	bzero(&utsname, sizeof(utsname));
725	strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
726	getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
727	getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME);
728	strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
729	strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
730	for (p = utsname.version; *p != '\0'; ++p)
731		if (*p == '\n') {
732			*p = '\0';
733			break;
734		}
735	strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME);
736
737	return (copyout(&utsname, args->buf, sizeof(utsname)));
738}
739
740#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
741struct l_utimbuf {
742	l_time_t l_actime;
743	l_time_t l_modtime;
744};
745
746int
747linux_utime(struct thread *td, struct linux_utime_args *args)
748{
749	struct timeval tv[2], *tvp;
750	struct l_utimbuf lut;
751	char *fname;
752	int error;
753
754	LCONVPATHEXIST(td, args->fname, &fname);
755
756#ifdef DEBUG
757	if (ldebug(utime))
758		printf(ARGS(utime, "%s, *"), fname);
759#endif
760
761	if (args->times) {
762		if ((error = copyin(args->times, &lut, sizeof lut))) {
763			LFREEPATH(fname);
764			return (error);
765		}
766		tv[0].tv_sec = lut.l_actime;
767		tv[0].tv_usec = 0;
768		tv[1].tv_sec = lut.l_modtime;
769		tv[1].tv_usec = 0;
770		tvp = tv;
771	} else
772		tvp = NULL;
773
774	error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
775	LFREEPATH(fname);
776	return (error);
777}
778
779int
780linux_utimes(struct thread *td, struct linux_utimes_args *args)
781{
782	l_timeval ltv[2];
783	struct timeval tv[2], *tvp = NULL;
784	char *fname;
785	int error;
786
787	LCONVPATHEXIST(td, args->fname, &fname);
788
789#ifdef DEBUG
790	if (ldebug(utimes))
791		printf(ARGS(utimes, "%s, *"), fname);
792#endif
793
794	if (args->tptr != NULL) {
795		if ((error = copyin(args->tptr, ltv, sizeof ltv))) {
796			LFREEPATH(fname);
797			return (error);
798		}
799		tv[0].tv_sec = ltv[0].tv_sec;
800		tv[0].tv_usec = ltv[0].tv_usec;
801		tv[1].tv_sec = ltv[1].tv_sec;
802		tv[1].tv_usec = ltv[1].tv_usec;
803		tvp = tv;
804	}
805
806	error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
807	LFREEPATH(fname);
808	return (error);
809}
810
811int
812linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
813{
814	l_timeval ltv[2];
815	struct timeval tv[2], *tvp = NULL;
816	char *fname;
817	int error, dfd;
818
819	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
820	LCONVPATHEXIST_AT(td, args->filename, &fname, dfd);
821
822#ifdef DEBUG
823	if (ldebug(futimesat))
824		printf(ARGS(futimesat, "%s, *"), fname);
825#endif
826
827	if (args->utimes != NULL) {
828		if ((error = copyin(args->utimes, ltv, sizeof ltv))) {
829			LFREEPATH(fname);
830			return (error);
831		}
832		tv[0].tv_sec = ltv[0].tv_sec;
833		tv[0].tv_usec = ltv[0].tv_usec;
834		tv[1].tv_sec = ltv[1].tv_sec;
835		tv[1].tv_usec = ltv[1].tv_usec;
836		tvp = tv;
837	}
838
839	error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
840	LFREEPATH(fname);
841	return (error);
842}
843#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
844
845int
846linux_common_wait(struct thread *td, int pid, int *status,
847    int options, struct rusage *ru)
848{
849	int error, tmpstat;
850
851	error = kern_wait(td, pid, &tmpstat, options, ru);
852	if (error)
853		return (error);
854
855	if (status) {
856		tmpstat &= 0xffff;
857		if (WIFSIGNALED(tmpstat))
858			tmpstat = (tmpstat & 0xffffff80) |
859			    BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
860		else if (WIFSTOPPED(tmpstat))
861			tmpstat = (tmpstat & 0xffff00ff) |
862			    (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
863		error = copyout(&tmpstat, status, sizeof(int));
864	}
865
866	return (error);
867}
868
869int
870linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
871{
872	int options;
873
874#ifdef DEBUG
875	if (ldebug(waitpid))
876		printf(ARGS(waitpid, "%d, %p, %d"),
877		    args->pid, (void *)args->status, args->options);
878#endif
879	/*
880	 * this is necessary because the test in kern_wait doesn't work
881	 * because we mess with the options here
882	 */
883	if (args->options & ~(WUNTRACED | WNOHANG | WCONTINUED | __WCLONE))
884		return (EINVAL);
885
886	options = (args->options & (WNOHANG | WUNTRACED));
887	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
888	if (args->options & __WCLONE)
889		options |= WLINUXCLONE;
890
891	return (linux_common_wait(td, args->pid, args->status, options, NULL));
892}
893
894int
895linux_wait4(struct thread *td, struct linux_wait4_args *args)
896{
897	int error, options;
898	struct rusage ru, *rup;
899
900#ifdef DEBUG
901	if (ldebug(wait4))
902		printf(ARGS(wait4, "%d, %p, %d, %p"),
903		    args->pid, (void *)args->status, args->options,
904		    (void *)args->rusage);
905#endif
906
907	options = (args->options & (WNOHANG | WUNTRACED));
908	/* WLINUXCLONE should be equal to __WCLONE, but we make sure */
909	if (args->options & __WCLONE)
910		options |= WLINUXCLONE;
911
912	if (args->rusage != NULL)
913		rup = &ru;
914	else
915		rup = NULL;
916	error = linux_common_wait(td, args->pid, args->status, options, rup);
917	if (error != 0)
918		return (error);
919	if (args->rusage != NULL)
920		error = linux_copyout_rusage(&ru, args->rusage);
921	return (error);
922}
923
924int
925linux_waitid(struct thread *td, struct linux_waitid_args *args)
926{
927	int status, options, sig;
928	struct __wrusage wru;
929	siginfo_t siginfo;
930	l_siginfo_t lsi;
931	idtype_t idtype;
932	struct proc *p;
933	int error;
934
935	options = 0;
936	linux_to_bsd_waitopts(args->options, &options);
937
938	if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED))
939		return (EINVAL);
940	if (!(options & (WEXITED | WUNTRACED | WCONTINUED)))
941		return (EINVAL);
942
943	switch (args->idtype) {
944	case LINUX_P_ALL:
945		idtype = P_ALL;
946		break;
947	case LINUX_P_PID:
948		if (args->id <= 0)
949			return (EINVAL);
950		idtype = P_PID;
951		break;
952	case LINUX_P_PGID:
953		if (args->id <= 0)
954			return (EINVAL);
955		idtype = P_PGID;
956		break;
957	default:
958		return (EINVAL);
959	}
960
961	error = kern_wait6(td, idtype, args->id, &status, options,
962	    &wru, &siginfo);
963	if (error != 0)
964		return (error);
965	if (args->rusage != NULL) {
966		error = linux_copyout_rusage(&wru.wru_children,
967		    args->rusage);
968		if (error != 0)
969			return (error);
970	}
971	if (args->info != NULL) {
972		p = td->td_proc;
973		if (td->td_retval[0] == 0)
974			bzero(&lsi, sizeof(lsi));
975		else {
976			sig = BSD_TO_LINUX_SIGNAL(siginfo.si_signo);
977			siginfo_to_lsiginfo(&siginfo, &lsi, sig);
978		}
979		error = copyout(&lsi, args->info, sizeof(lsi));
980	}
981	td->td_retval[0] = 0;
982
983	return (error);
984}
985
986int
987linux_mknod(struct thread *td, struct linux_mknod_args *args)
988{
989	char *path;
990	int error;
991
992	LCONVPATHCREAT(td, args->path, &path);
993
994#ifdef DEBUG
995	if (ldebug(mknod))
996		printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev);
997#endif
998
999	switch (args->mode & S_IFMT) {
1000	case S_IFIFO:
1001	case S_IFSOCK:
1002		error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode);
1003		break;
1004
1005	case S_IFCHR:
1006	case S_IFBLK:
1007		error = kern_mknod(td, path, UIO_SYSSPACE, args->mode,
1008		    args->dev);
1009		break;
1010
1011	case S_IFDIR:
1012		error = EPERM;
1013		break;
1014
1015	case 0:
1016		args->mode |= S_IFREG;
1017		/* FALLTHROUGH */
1018	case S_IFREG:
1019		error = kern_open(td, path, UIO_SYSSPACE,
1020		    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
1021		if (error == 0)
1022			kern_close(td, td->td_retval[0]);
1023		break;
1024
1025	default:
1026		error = EINVAL;
1027		break;
1028	}
1029	LFREEPATH(path);
1030	return (error);
1031}
1032
1033int
1034linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
1035{
1036	char *path;
1037	int error, dfd;
1038
1039	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
1040	LCONVPATHCREAT_AT(td, args->filename, &path, dfd);
1041
1042#ifdef DEBUG
1043	if (ldebug(mknodat))
1044		printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev);
1045#endif
1046
1047	switch (args->mode & S_IFMT) {
1048	case S_IFIFO:
1049	case S_IFSOCK:
1050		error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode);
1051		break;
1052
1053	case S_IFCHR:
1054	case S_IFBLK:
1055		error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode,
1056		    args->dev);
1057		break;
1058
1059	case S_IFDIR:
1060		error = EPERM;
1061		break;
1062
1063	case 0:
1064		args->mode |= S_IFREG;
1065		/* FALLTHROUGH */
1066	case S_IFREG:
1067		error = kern_openat(td, dfd, path, UIO_SYSSPACE,
1068		    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
1069		if (error == 0)
1070			kern_close(td, td->td_retval[0]);
1071		break;
1072
1073	default:
1074		error = EINVAL;
1075		break;
1076	}
1077	LFREEPATH(path);
1078	return (error);
1079}
1080
1081/*
1082 * UGH! This is just about the dumbest idea I've ever heard!!
1083 */
1084int
1085linux_personality(struct thread *td, struct linux_personality_args *args)
1086{
1087#ifdef DEBUG
1088	if (ldebug(personality))
1089		printf(ARGS(personality, "%lu"), (unsigned long)args->per);
1090#endif
1091	if (args->per != 0)
1092		return (EINVAL);
1093
1094	/* Yes Jim, it's still a Linux... */
1095	td->td_retval[0] = 0;
1096	return (0);
1097}
1098
1099struct l_itimerval {
1100	l_timeval it_interval;
1101	l_timeval it_value;
1102};
1103
1104#define	B2L_ITIMERVAL(bip, lip) 					\
1105	(bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec;		\
1106	(bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec;	\
1107	(bip)->it_value.tv_sec = (lip)->it_value.tv_sec;		\
1108	(bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
1109
1110int
1111linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
1112{
1113	int error;
1114	struct l_itimerval ls;
1115	struct itimerval aitv, oitv;
1116
1117#ifdef DEBUG
1118	if (ldebug(setitimer))
1119		printf(ARGS(setitimer, "%p, %p"),
1120		    (void *)uap->itv, (void *)uap->oitv);
1121#endif
1122
1123	if (uap->itv == NULL) {
1124		uap->itv = uap->oitv;
1125		return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
1126	}
1127
1128	error = copyin(uap->itv, &ls, sizeof(ls));
1129	if (error != 0)
1130		return (error);
1131	B2L_ITIMERVAL(&aitv, &ls);
1132#ifdef DEBUG
1133	if (ldebug(setitimer)) {
1134		printf("setitimer: value: sec: %jd, usec: %ld\n",
1135		    (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec);
1136		printf("setitimer: interval: sec: %jd, usec: %ld\n",
1137		    (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec);
1138	}
1139#endif
1140	error = kern_setitimer(td, uap->which, &aitv, &oitv);
1141	if (error != 0 || uap->oitv == NULL)
1142		return (error);
1143	B2L_ITIMERVAL(&ls, &oitv);
1144
1145	return (copyout(&ls, uap->oitv, sizeof(ls)));
1146}
1147
1148int
1149linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
1150{
1151	int error;
1152	struct l_itimerval ls;
1153	struct itimerval aitv;
1154
1155#ifdef DEBUG
1156	if (ldebug(getitimer))
1157		printf(ARGS(getitimer, "%p"), (void *)uap->itv);
1158#endif
1159	error = kern_getitimer(td, uap->which, &aitv);
1160	if (error != 0)
1161		return (error);
1162	B2L_ITIMERVAL(&ls, &aitv);
1163	return (copyout(&ls, uap->itv, sizeof(ls)));
1164}
1165
1166int
1167linux_nice(struct thread *td, struct linux_nice_args *args)
1168{
1169	struct setpriority_args bsd_args;
1170
1171	bsd_args.which = PRIO_PROCESS;
1172	bsd_args.who = 0;		/* current process */
1173	bsd_args.prio = args->inc;
1174	return (sys_setpriority(td, &bsd_args));
1175}
1176
1177int
1178linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
1179{
1180	struct ucred *newcred, *oldcred;
1181	l_gid_t *linux_gidset;
1182	gid_t *bsd_gidset;
1183	int ngrp, error;
1184	struct proc *p;
1185
1186	ngrp = args->gidsetsize;
1187	if (ngrp < 0 || ngrp >= ngroups_max + 1)
1188		return (EINVAL);
1189	linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK);
1190	error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
1191	if (error)
1192		goto out;
1193	newcred = crget();
1194	p = td->td_proc;
1195	PROC_LOCK(p);
1196	oldcred = crcopysafe(p, newcred);
1197
1198	/*
1199	 * cr_groups[0] holds egid. Setting the whole set from
1200	 * the supplied set will cause egid to be changed too.
1201	 * Keep cr_groups[0] unchanged to prevent that.
1202	 */
1203
1204	if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) {
1205		PROC_UNLOCK(p);
1206		crfree(newcred);
1207		goto out;
1208	}
1209
1210	if (ngrp > 0) {
1211		newcred->cr_ngroups = ngrp + 1;
1212
1213		bsd_gidset = newcred->cr_groups;
1214		ngrp--;
1215		while (ngrp >= 0) {
1216			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1217			ngrp--;
1218		}
1219	} else
1220		newcred->cr_ngroups = 1;
1221
1222	setsugid(p);
1223	p->p_ucred = newcred;
1224	PROC_UNLOCK(p);
1225	crfree(oldcred);
1226	error = 0;
1227out:
1228	free(linux_gidset, M_TEMP);
1229	return (error);
1230}
1231
1232int
1233linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1234{
1235	struct ucred *cred;
1236	l_gid_t *linux_gidset;
1237	gid_t *bsd_gidset;
1238	int bsd_gidsetsz, ngrp, error;
1239
1240	cred = td->td_ucred;
1241	bsd_gidset = cred->cr_groups;
1242	bsd_gidsetsz = cred->cr_ngroups - 1;
1243
1244	/*
1245	 * cr_groups[0] holds egid. Returning the whole set
1246	 * here will cause a duplicate. Exclude cr_groups[0]
1247	 * to prevent that.
1248	 */
1249
1250	if ((ngrp = args->gidsetsize) == 0) {
1251		td->td_retval[0] = bsd_gidsetsz;
1252		return (0);
1253	}
1254
1255	if (ngrp < bsd_gidsetsz)
1256		return (EINVAL);
1257
1258	ngrp = 0;
1259	linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
1260	    M_TEMP, M_WAITOK);
1261	while (ngrp < bsd_gidsetsz) {
1262		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1263		ngrp++;
1264	}
1265
1266	error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t));
1267	free(linux_gidset, M_TEMP);
1268	if (error)
1269		return (error);
1270
1271	td->td_retval[0] = ngrp;
1272	return (0);
1273}
1274
1275int
1276linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1277{
1278	struct rlimit bsd_rlim;
1279	struct l_rlimit rlim;
1280	u_int which;
1281	int error;
1282
1283#ifdef DEBUG
1284	if (ldebug(setrlimit))
1285		printf(ARGS(setrlimit, "%d, %p"),
1286		    args->resource, (void *)args->rlim);
1287#endif
1288
1289	if (args->resource >= LINUX_RLIM_NLIMITS)
1290		return (EINVAL);
1291
1292	which = linux_to_bsd_resource[args->resource];
1293	if (which == -1)
1294		return (EINVAL);
1295
1296	error = copyin(args->rlim, &rlim, sizeof(rlim));
1297	if (error)
1298		return (error);
1299
1300	bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1301	bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1302	return (kern_setrlimit(td, which, &bsd_rlim));
1303}
1304
1305int
1306linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1307{
1308	struct l_rlimit rlim;
1309	struct proc *p = td->td_proc;
1310	struct rlimit bsd_rlim;
1311	u_int which;
1312
1313#ifdef DEBUG
1314	if (ldebug(old_getrlimit))
1315		printf(ARGS(old_getrlimit, "%d, %p"),
1316		    args->resource, (void *)args->rlim);
1317#endif
1318
1319	if (args->resource >= LINUX_RLIM_NLIMITS)
1320		return (EINVAL);
1321
1322	which = linux_to_bsd_resource[args->resource];
1323	if (which == -1)
1324		return (EINVAL);
1325
1326	PROC_LOCK(p);
1327	lim_rlimit(p, which, &bsd_rlim);
1328	PROC_UNLOCK(p);
1329
1330#ifdef COMPAT_LINUX32
1331	rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
1332	if (rlim.rlim_cur == UINT_MAX)
1333		rlim.rlim_cur = INT_MAX;
1334	rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
1335	if (rlim.rlim_max == UINT_MAX)
1336		rlim.rlim_max = INT_MAX;
1337#else
1338	rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
1339	if (rlim.rlim_cur == ULONG_MAX)
1340		rlim.rlim_cur = LONG_MAX;
1341	rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
1342	if (rlim.rlim_max == ULONG_MAX)
1343		rlim.rlim_max = LONG_MAX;
1344#endif
1345	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1346}
1347
1348int
1349linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1350{
1351	struct l_rlimit rlim;
1352	struct proc *p = td->td_proc;
1353	struct rlimit bsd_rlim;
1354	u_int which;
1355
1356#ifdef DEBUG
1357	if (ldebug(getrlimit))
1358		printf(ARGS(getrlimit, "%d, %p"),
1359		    args->resource, (void *)args->rlim);
1360#endif
1361
1362	if (args->resource >= LINUX_RLIM_NLIMITS)
1363		return (EINVAL);
1364
1365	which = linux_to_bsd_resource[args->resource];
1366	if (which == -1)
1367		return (EINVAL);
1368
1369	PROC_LOCK(p);
1370	lim_rlimit(p, which, &bsd_rlim);
1371	PROC_UNLOCK(p);
1372
1373	rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
1374	rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
1375	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1376}
1377
1378int
1379linux_sched_setscheduler(struct thread *td,
1380    struct linux_sched_setscheduler_args *args)
1381{
1382	struct sched_param sched_param;
1383	struct thread *tdt;
1384	int error, policy;
1385
1386#ifdef DEBUG
1387	if (ldebug(sched_setscheduler))
1388		printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1389		    args->pid, args->policy, (const void *)args->param);
1390#endif
1391
1392	switch (args->policy) {
1393	case LINUX_SCHED_OTHER:
1394		policy = SCHED_OTHER;
1395		break;
1396	case LINUX_SCHED_FIFO:
1397		policy = SCHED_FIFO;
1398		break;
1399	case LINUX_SCHED_RR:
1400		policy = SCHED_RR;
1401		break;
1402	default:
1403		return (EINVAL);
1404	}
1405
1406	error = copyin(args->param, &sched_param, sizeof(sched_param));
1407	if (error)
1408		return (error);
1409
1410	tdt = linux_tdfind(td, args->pid, -1);
1411	if (tdt == NULL)
1412		return (ESRCH);
1413
1414	error = kern_sched_setscheduler(td, tdt, policy, &sched_param);
1415	PROC_UNLOCK(tdt->td_proc);
1416	return (error);
1417}
1418
1419int
1420linux_sched_getscheduler(struct thread *td,
1421    struct linux_sched_getscheduler_args *args)
1422{
1423	struct thread *tdt;
1424	int error, policy;
1425
1426#ifdef DEBUG
1427	if (ldebug(sched_getscheduler))
1428		printf(ARGS(sched_getscheduler, "%d"), args->pid);
1429#endif
1430
1431	tdt = linux_tdfind(td, args->pid, -1);
1432	if (tdt == NULL)
1433		return (ESRCH);
1434
1435	error = kern_sched_getscheduler(td, tdt, &policy);
1436	PROC_UNLOCK(tdt->td_proc);
1437
1438	switch (policy) {
1439	case SCHED_OTHER:
1440		td->td_retval[0] = LINUX_SCHED_OTHER;
1441		break;
1442	case SCHED_FIFO:
1443		td->td_retval[0] = LINUX_SCHED_FIFO;
1444		break;
1445	case SCHED_RR:
1446		td->td_retval[0] = LINUX_SCHED_RR;
1447		break;
1448	}
1449	return (error);
1450}
1451
1452int
1453linux_sched_get_priority_max(struct thread *td,
1454    struct linux_sched_get_priority_max_args *args)
1455{
1456	struct sched_get_priority_max_args bsd;
1457
1458#ifdef DEBUG
1459	if (ldebug(sched_get_priority_max))
1460		printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1461#endif
1462
1463	switch (args->policy) {
1464	case LINUX_SCHED_OTHER:
1465		bsd.policy = SCHED_OTHER;
1466		break;
1467	case LINUX_SCHED_FIFO:
1468		bsd.policy = SCHED_FIFO;
1469		break;
1470	case LINUX_SCHED_RR:
1471		bsd.policy = SCHED_RR;
1472		break;
1473	default:
1474		return (EINVAL);
1475	}
1476	return (sys_sched_get_priority_max(td, &bsd));
1477}
1478
1479int
1480linux_sched_get_priority_min(struct thread *td,
1481    struct linux_sched_get_priority_min_args *args)
1482{
1483	struct sched_get_priority_min_args bsd;
1484
1485#ifdef DEBUG
1486	if (ldebug(sched_get_priority_min))
1487		printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1488#endif
1489
1490	switch (args->policy) {
1491	case LINUX_SCHED_OTHER:
1492		bsd.policy = SCHED_OTHER;
1493		break;
1494	case LINUX_SCHED_FIFO:
1495		bsd.policy = SCHED_FIFO;
1496		break;
1497	case LINUX_SCHED_RR:
1498		bsd.policy = SCHED_RR;
1499		break;
1500	default:
1501		return (EINVAL);
1502	}
1503	return (sys_sched_get_priority_min(td, &bsd));
1504}
1505
1506#define REBOOT_CAD_ON	0x89abcdef
1507#define REBOOT_CAD_OFF	0
1508#define REBOOT_HALT	0xcdef0123
1509#define REBOOT_RESTART	0x01234567
1510#define REBOOT_RESTART2	0xA1B2C3D4
1511#define REBOOT_POWEROFF	0x4321FEDC
1512#define REBOOT_MAGIC1	0xfee1dead
1513#define REBOOT_MAGIC2	0x28121969
1514#define REBOOT_MAGIC2A	0x05121996
1515#define REBOOT_MAGIC2B	0x16041998
1516
1517int
1518linux_reboot(struct thread *td, struct linux_reboot_args *args)
1519{
1520	struct reboot_args bsd_args;
1521
1522#ifdef DEBUG
1523	if (ldebug(reboot))
1524		printf(ARGS(reboot, "0x%x"), args->cmd);
1525#endif
1526
1527	if (args->magic1 != REBOOT_MAGIC1)
1528		return (EINVAL);
1529
1530	switch (args->magic2) {
1531	case REBOOT_MAGIC2:
1532	case REBOOT_MAGIC2A:
1533	case REBOOT_MAGIC2B:
1534		break;
1535	default:
1536		return (EINVAL);
1537	}
1538
1539	switch (args->cmd) {
1540	case REBOOT_CAD_ON:
1541	case REBOOT_CAD_OFF:
1542		return (priv_check(td, PRIV_REBOOT));
1543	case REBOOT_HALT:
1544		bsd_args.opt = RB_HALT;
1545		break;
1546	case REBOOT_RESTART:
1547	case REBOOT_RESTART2:
1548		bsd_args.opt = 0;
1549		break;
1550	case REBOOT_POWEROFF:
1551		bsd_args.opt = RB_POWEROFF;
1552		break;
1553	default:
1554		return (EINVAL);
1555	}
1556	return (sys_reboot(td, &bsd_args));
1557}
1558
1559
1560/*
1561 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1562 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that
1563 * are assumed to be preserved. The following lightweight syscalls fixes
1564 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c
1565 *
1566 * linux_getpid() - MP SAFE
1567 * linux_getgid() - MP SAFE
1568 * linux_getuid() - MP SAFE
1569 */
1570
1571int
1572linux_getpid(struct thread *td, struct linux_getpid_args *args)
1573{
1574
1575#ifdef DEBUG
1576	if (ldebug(getpid))
1577		printf(ARGS(getpid, ""));
1578#endif
1579	td->td_retval[0] = td->td_proc->p_pid;
1580
1581	return (0);
1582}
1583
1584int
1585linux_gettid(struct thread *td, struct linux_gettid_args *args)
1586{
1587	struct linux_emuldata *em;
1588
1589#ifdef DEBUG
1590	if (ldebug(gettid))
1591		printf(ARGS(gettid, ""));
1592#endif
1593
1594	em = em_find(td);
1595	KASSERT(em != NULL, ("gettid: emuldata not found.\n"));
1596
1597	td->td_retval[0] = em->em_tid;
1598
1599	return (0);
1600}
1601
1602
1603int
1604linux_getppid(struct thread *td, struct linux_getppid_args *args)
1605{
1606
1607#ifdef DEBUG
1608	if (ldebug(getppid))
1609		printf(ARGS(getppid, ""));
1610#endif
1611
1612	PROC_LOCK(td->td_proc);
1613	td->td_retval[0] = td->td_proc->p_pptr->p_pid;
1614	PROC_UNLOCK(td->td_proc);
1615	return (0);
1616}
1617
1618int
1619linux_getgid(struct thread *td, struct linux_getgid_args *args)
1620{
1621
1622#ifdef DEBUG
1623	if (ldebug(getgid))
1624		printf(ARGS(getgid, ""));
1625#endif
1626
1627	td->td_retval[0] = td->td_ucred->cr_rgid;
1628	return (0);
1629}
1630
1631int
1632linux_getuid(struct thread *td, struct linux_getuid_args *args)
1633{
1634
1635#ifdef DEBUG
1636	if (ldebug(getuid))
1637		printf(ARGS(getuid, ""));
1638#endif
1639
1640	td->td_retval[0] = td->td_ucred->cr_ruid;
1641	return (0);
1642}
1643
1644
1645int
1646linux_getsid(struct thread *td, struct linux_getsid_args *args)
1647{
1648	struct getsid_args bsd;
1649
1650#ifdef DEBUG
1651	if (ldebug(getsid))
1652		printf(ARGS(getsid, "%i"), args->pid);
1653#endif
1654
1655	bsd.pid = args->pid;
1656	return (sys_getsid(td, &bsd));
1657}
1658
1659int
1660linux_nosys(struct thread *td, struct nosys_args *ignore)
1661{
1662
1663	return (ENOSYS);
1664}
1665
1666int
1667linux_getpriority(struct thread *td, struct linux_getpriority_args *args)
1668{
1669	struct getpriority_args bsd_args;
1670	int error;
1671
1672#ifdef DEBUG
1673	if (ldebug(getpriority))
1674		printf(ARGS(getpriority, "%i, %i"), args->which, args->who);
1675#endif
1676
1677	bsd_args.which = args->which;
1678	bsd_args.who = args->who;
1679	error = sys_getpriority(td, &bsd_args);
1680	td->td_retval[0] = 20 - td->td_retval[0];
1681	return (error);
1682}
1683
1684int
1685linux_sethostname(struct thread *td, struct linux_sethostname_args *args)
1686{
1687	int name[2];
1688
1689#ifdef DEBUG
1690	if (ldebug(sethostname))
1691		printf(ARGS(sethostname, "*, %i"), args->len);
1692#endif
1693
1694	name[0] = CTL_KERN;
1695	name[1] = KERN_HOSTNAME;
1696	return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname,
1697	    args->len, 0, 0));
1698}
1699
1700int
1701linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args)
1702{
1703	int name[2];
1704
1705#ifdef DEBUG
1706	if (ldebug(setdomainname))
1707		printf(ARGS(setdomainname, "*, %i"), args->len);
1708#endif
1709
1710	name[0] = CTL_KERN;
1711	name[1] = KERN_NISDOMAINNAME;
1712	return (userland_sysctl(td, name, 2, 0, 0, 0, args->name,
1713	    args->len, 0, 0));
1714}
1715
1716int
1717linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
1718{
1719
1720#ifdef DEBUG
1721	if (ldebug(exit_group))
1722		printf(ARGS(exit_group, "%i"), args->error_code);
1723#endif
1724
1725	LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid,
1726	    args->error_code);
1727
1728	/*
1729	 * XXX: we should send a signal to the parent if
1730	 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
1731	 * as it doesnt occur often.
1732	 */
1733	exit1(td, W_EXITCODE(args->error_code, 0));
1734		/* NOTREACHED */
1735}
1736
1737#define _LINUX_CAPABILITY_VERSION  0x19980330
1738
1739struct l_user_cap_header {
1740	l_int	version;
1741	l_int	pid;
1742};
1743
1744struct l_user_cap_data {
1745	l_int	effective;
1746	l_int	permitted;
1747	l_int	inheritable;
1748};
1749
1750int
1751linux_capget(struct thread *td, struct linux_capget_args *args)
1752{
1753	struct l_user_cap_header luch;
1754	struct l_user_cap_data lucd;
1755	int error;
1756
1757	if (args->hdrp == NULL)
1758		return (EFAULT);
1759
1760	error = copyin(args->hdrp, &luch, sizeof(luch));
1761	if (error != 0)
1762		return (error);
1763
1764	if (luch.version != _LINUX_CAPABILITY_VERSION) {
1765		luch.version = _LINUX_CAPABILITY_VERSION;
1766		error = copyout(&luch, args->hdrp, sizeof(luch));
1767		if (error)
1768			return (error);
1769		return (EINVAL);
1770	}
1771
1772	if (luch.pid)
1773		return (EPERM);
1774
1775	if (args->datap) {
1776		/*
1777		 * The current implementation doesn't support setting
1778		 * a capability (it's essentially a stub) so indicate
1779		 * that no capabilities are currently set or available
1780		 * to request.
1781		 */
1782		bzero (&lucd, sizeof(lucd));
1783		error = copyout(&lucd, args->datap, sizeof(lucd));
1784	}
1785
1786	return (error);
1787}
1788
1789int
1790linux_capset(struct thread *td, struct linux_capset_args *args)
1791{
1792	struct l_user_cap_header luch;
1793	struct l_user_cap_data lucd;
1794	int error;
1795
1796	if (args->hdrp == NULL || args->datap == NULL)
1797		return (EFAULT);
1798
1799	error = copyin(args->hdrp, &luch, sizeof(luch));
1800	if (error != 0)
1801		return (error);
1802
1803	if (luch.version != _LINUX_CAPABILITY_VERSION) {
1804		luch.version = _LINUX_CAPABILITY_VERSION;
1805		error = copyout(&luch, args->hdrp, sizeof(luch));
1806		if (error)
1807			return (error);
1808		return (EINVAL);
1809	}
1810
1811	if (luch.pid)
1812		return (EPERM);
1813
1814	error = copyin(args->datap, &lucd, sizeof(lucd));
1815	if (error != 0)
1816		return (error);
1817
1818	/* We currently don't support setting any capabilities. */
1819	if (lucd.effective || lucd.permitted || lucd.inheritable) {
1820		linux_msg(td,
1821			  "capset effective=0x%x, permitted=0x%x, "
1822			  "inheritable=0x%x is not implemented",
1823			  (int)lucd.effective, (int)lucd.permitted,
1824			  (int)lucd.inheritable);
1825		return (EPERM);
1826	}
1827
1828	return (0);
1829}
1830
1831int
1832linux_prctl(struct thread *td, struct linux_prctl_args *args)
1833{
1834	int error = 0, max_size;
1835	struct proc *p = td->td_proc;
1836	char comm[LINUX_MAX_COMM_LEN];
1837	struct linux_emuldata *em;
1838	int pdeath_signal;
1839
1840#ifdef DEBUG
1841	if (ldebug(prctl))
1842		printf(ARGS(prctl, "%d, %d, %d, %d, %d"), args->option,
1843		    args->arg2, args->arg3, args->arg4, args->arg5);
1844#endif
1845
1846	switch (args->option) {
1847	case LINUX_PR_SET_PDEATHSIG:
1848		if (!LINUX_SIG_VALID(args->arg2))
1849			return (EINVAL);
1850		em = em_find(td);
1851		KASSERT(em != NULL, ("prctl: emuldata not found.\n"));
1852		em->pdeath_signal = args->arg2;
1853		break;
1854	case LINUX_PR_GET_PDEATHSIG:
1855		em = em_find(td);
1856		KASSERT(em != NULL, ("prctl: emuldata not found.\n"));
1857		pdeath_signal = em->pdeath_signal;
1858		error = copyout(&pdeath_signal,
1859		    (void *)(register_t)args->arg2,
1860		    sizeof(pdeath_signal));
1861		break;
1862	case LINUX_PR_GET_KEEPCAPS:
1863		/*
1864		 * Indicate that we always clear the effective and
1865		 * permitted capability sets when the user id becomes
1866		 * non-zero (actually the capability sets are simply
1867		 * always zero in the current implementation).
1868		 */
1869		td->td_retval[0] = 0;
1870		break;
1871	case LINUX_PR_SET_KEEPCAPS:
1872		/*
1873		 * Ignore requests to keep the effective and permitted
1874		 * capability sets when the user id becomes non-zero.
1875		 */
1876		break;
1877	case LINUX_PR_SET_NAME:
1878		/*
1879		 * To be on the safe side we need to make sure to not
1880		 * overflow the size a linux program expects. We already
1881		 * do this here in the copyin, so that we don't need to
1882		 * check on copyout.
1883		 */
1884		max_size = MIN(sizeof(comm), sizeof(p->p_comm));
1885		error = copyinstr((void *)(register_t)args->arg2, comm,
1886		    max_size, NULL);
1887
1888		/* Linux silently truncates the name if it is too long. */
1889		if (error == ENAMETOOLONG) {
1890			/*
1891			 * XXX: copyinstr() isn't documented to populate the
1892			 * array completely, so do a copyin() to be on the
1893			 * safe side. This should be changed in case
1894			 * copyinstr() is changed to guarantee this.
1895			 */
1896			error = copyin((void *)(register_t)args->arg2, comm,
1897			    max_size - 1);
1898			comm[max_size - 1] = '\0';
1899		}
1900		if (error)
1901			return (error);
1902
1903		PROC_LOCK(p);
1904		strlcpy(p->p_comm, comm, sizeof(p->p_comm));
1905		PROC_UNLOCK(p);
1906		break;
1907	case LINUX_PR_GET_NAME:
1908		PROC_LOCK(p);
1909		strlcpy(comm, p->p_comm, sizeof(comm));
1910		PROC_UNLOCK(p);
1911		error = copyout(comm, (void *)(register_t)args->arg2,
1912		    strlen(comm) + 1);
1913		break;
1914	default:
1915		error = EINVAL;
1916		break;
1917	}
1918
1919	return (error);
1920}
1921
1922int
1923linux_sched_setparam(struct thread *td,
1924    struct linux_sched_setparam_args *uap)
1925{
1926	struct sched_param sched_param;
1927	struct thread *tdt;
1928	int error;
1929
1930#ifdef DEBUG
1931	if (ldebug(sched_setparam))
1932		printf(ARGS(sched_setparam, "%d, *"), uap->pid);
1933#endif
1934
1935	error = copyin(uap->param, &sched_param, sizeof(sched_param));
1936	if (error)
1937		return (error);
1938
1939	tdt = linux_tdfind(td, uap->pid, -1);
1940	if (tdt == NULL)
1941		return (ESRCH);
1942
1943	error = kern_sched_setparam(td, tdt, &sched_param);
1944	PROC_UNLOCK(tdt->td_proc);
1945	return (error);
1946}
1947
1948int
1949linux_sched_getparam(struct thread *td,
1950    struct linux_sched_getparam_args *uap)
1951{
1952	struct sched_param sched_param;
1953	struct thread *tdt;
1954	int error;
1955
1956#ifdef DEBUG
1957	if (ldebug(sched_getparam))
1958		printf(ARGS(sched_getparam, "%d, *"), uap->pid);
1959#endif
1960
1961	tdt = linux_tdfind(td, uap->pid, -1);
1962	if (tdt == NULL)
1963		return (ESRCH);
1964
1965	error = kern_sched_getparam(td, tdt, &sched_param);
1966	PROC_UNLOCK(tdt->td_proc);
1967	if (error == 0)
1968		error = copyout(&sched_param, uap->param,
1969		    sizeof(sched_param));
1970	return (error);
1971}
1972
1973/*
1974 * Get affinity of a process.
1975 */
1976int
1977linux_sched_getaffinity(struct thread *td,
1978    struct linux_sched_getaffinity_args *args)
1979{
1980	int error;
1981	struct thread *tdt;
1982	struct cpuset_getaffinity_args cga;
1983
1984#ifdef DEBUG
1985	if (ldebug(sched_getaffinity))
1986		printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid,
1987		    args->len);
1988#endif
1989	if (args->len < sizeof(cpuset_t))
1990		return (EINVAL);
1991
1992	tdt = linux_tdfind(td, args->pid, -1);
1993	if (tdt == NULL)
1994		return (ESRCH);
1995
1996	PROC_UNLOCK(tdt->td_proc);
1997	cga.level = CPU_LEVEL_WHICH;
1998	cga.which = CPU_WHICH_TID;
1999	cga.id = tdt->td_tid;
2000	cga.cpusetsize = sizeof(cpuset_t);
2001	cga.mask = (cpuset_t *) args->user_mask_ptr;
2002
2003	if ((error = sys_cpuset_getaffinity(td, &cga)) == 0)
2004		td->td_retval[0] = sizeof(cpuset_t);
2005
2006	return (error);
2007}
2008
2009/*
2010 *  Set affinity of a process.
2011 */
2012int
2013linux_sched_setaffinity(struct thread *td,
2014    struct linux_sched_setaffinity_args *args)
2015{
2016	struct cpuset_setaffinity_args csa;
2017	struct thread *tdt;
2018
2019#ifdef DEBUG
2020	if (ldebug(sched_setaffinity))
2021		printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid,
2022		    args->len);
2023#endif
2024	if (args->len < sizeof(cpuset_t))
2025		return (EINVAL);
2026
2027	tdt = linux_tdfind(td, args->pid, -1);
2028	if (tdt == NULL)
2029		return (ESRCH);
2030
2031	PROC_UNLOCK(tdt->td_proc);
2032	csa.level = CPU_LEVEL_WHICH;
2033	csa.which = CPU_WHICH_TID;
2034	csa.id = tdt->td_tid;
2035	csa.cpusetsize = sizeof(cpuset_t);
2036	csa.mask = (cpuset_t *) args->user_mask_ptr;
2037
2038	return (sys_cpuset_setaffinity(td, &csa));
2039}
2040
2041struct linux_rlimit64 {
2042	uint64_t	rlim_cur;
2043	uint64_t	rlim_max;
2044};
2045
2046int
2047linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args)
2048{
2049	struct rlimit rlim, nrlim;
2050	struct linux_rlimit64 lrlim;
2051	struct proc *p;
2052	u_int which;
2053	int flags;
2054	int error;
2055
2056#ifdef DEBUG
2057	if (ldebug(prlimit64))
2058		printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid,
2059		    args->resource, (void *)args->new, (void *)args->old);
2060#endif
2061
2062	if (args->resource >= LINUX_RLIM_NLIMITS)
2063		return (EINVAL);
2064
2065	which = linux_to_bsd_resource[args->resource];
2066	if (which == -1)
2067		return (EINVAL);
2068
2069	if (args->new != NULL) {
2070		/*
2071		 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux
2072		 * rlim is unsigned 64-bit. FreeBSD treats negative limits
2073		 * as INFINITY so we do not need a conversion even.
2074		 */
2075		error = copyin(args->new, &nrlim, sizeof(nrlim));
2076		if (error != 0)
2077			return (error);
2078	}
2079
2080	flags = PGET_HOLD | PGET_NOTWEXIT;
2081	if (args->new != NULL)
2082		flags |= PGET_CANDEBUG;
2083	else
2084		flags |= PGET_CANSEE;
2085	error = pget(args->pid, flags, &p);
2086	if (error != 0)
2087		return (error);
2088
2089	if (args->old != NULL) {
2090		PROC_LOCK(p);
2091		lim_rlimit(p, which, &rlim);
2092		PROC_UNLOCK(p);
2093		if (rlim.rlim_cur == RLIM_INFINITY)
2094			lrlim.rlim_cur = LINUX_RLIM_INFINITY;
2095		else
2096			lrlim.rlim_cur = rlim.rlim_cur;
2097		if (rlim.rlim_max == RLIM_INFINITY)
2098			lrlim.rlim_max = LINUX_RLIM_INFINITY;
2099		else
2100			lrlim.rlim_max = rlim.rlim_max;
2101		error = copyout(&lrlim, args->old, sizeof(lrlim));
2102		if (error != 0)
2103			goto out;
2104	}
2105
2106	if (args->new != NULL)
2107		error = kern_proc_setrlimit(td, p, which, &nrlim);
2108
2109 out:
2110	PRELE(p);
2111	return (error);
2112}
2113
2114int
2115linux_pselect6(struct thread *td, struct linux_pselect6_args *args)
2116{
2117	struct timeval utv, tv0, tv1, *tvp;
2118	struct l_pselect6arg lpse6;
2119	struct l_timespec lts;
2120	struct timespec uts;
2121	l_sigset_t l_ss;
2122	sigset_t *ssp;
2123	sigset_t ss;
2124	int error;
2125
2126	ssp = NULL;
2127	if (args->sig != NULL) {
2128		error = copyin(args->sig, &lpse6, sizeof(lpse6));
2129		if (error != 0)
2130			return (error);
2131		if (lpse6.ss_len != sizeof(l_ss))
2132			return (EINVAL);
2133		if (lpse6.ss != 0) {
2134			error = copyin(PTRIN(lpse6.ss), &l_ss,
2135			    sizeof(l_ss));
2136			if (error != 0)
2137				return (error);
2138			linux_to_bsd_sigset(&l_ss, &ss);
2139			ssp = &ss;
2140		}
2141	}
2142
2143	/*
2144	 * Currently glibc changes nanosecond number to microsecond.
2145	 * This mean losing precision but for now it is hardly seen.
2146	 */
2147	if (args->tsp != NULL) {
2148		error = copyin(args->tsp, &lts, sizeof(lts));
2149		if (error != 0)
2150			return (error);
2151		uts.tv_sec = lts.tv_sec;
2152		uts.tv_nsec = lts.tv_nsec;
2153
2154		TIMESPEC_TO_TIMEVAL(&utv, &uts);
2155		if (itimerfix(&utv))
2156			return (EINVAL);
2157
2158		microtime(&tv0);
2159		tvp = &utv;
2160	} else
2161		tvp = NULL;
2162
2163	error = kern_pselect(td, args->nfds, args->readfds, args->writefds,
2164	    args->exceptfds, tvp, ssp, sizeof(l_int) * 8);
2165
2166	if (error == 0 && args->tsp != NULL) {
2167		if (td->td_retval[0] != 0) {
2168			/*
2169			 * Compute how much time was left of the timeout,
2170			 * by subtracting the current time and the time
2171			 * before we started the call, and subtracting
2172			 * that result from the user-supplied value.
2173			 */
2174
2175			microtime(&tv1);
2176			timevalsub(&tv1, &tv0);
2177			timevalsub(&utv, &tv1);
2178			if (utv.tv_sec < 0)
2179				timevalclear(&utv);
2180		} else
2181			timevalclear(&utv);
2182
2183		TIMEVAL_TO_TIMESPEC(&utv, &uts);
2184		lts.tv_sec = uts.tv_sec;
2185		lts.tv_nsec = uts.tv_nsec;
2186		error = copyout(&lts, args->tsp, sizeof(lts));
2187	}
2188
2189	return (error);
2190}
2191
2192int
2193linux_sched_rr_get_interval(struct thread *td,
2194    struct linux_sched_rr_get_interval_args *uap)
2195{
2196	struct timespec ts;
2197	struct l_timespec lts;
2198	struct thread *tdt;
2199	int error;
2200
2201	/*
2202	 * According to man in case the invalid pid specified
2203	 * EINVAL should be returned.
2204	 */
2205	if (uap->pid < 0)
2206		return (EINVAL);
2207
2208	tdt = linux_tdfind(td, uap->pid, -1);
2209	if (tdt == NULL)
2210		return (ESRCH);
2211
2212	error = kern_sched_rr_get_interval_td(td, tdt, &ts);
2213	PROC_UNLOCK(tdt->td_proc);
2214	if (error != 0)
2215		return (error);
2216	lts.tv_sec = ts.tv_sec;
2217	lts.tv_nsec = ts.tv_nsec;
2218	return (copyout(&lts, uap->interval, sizeof(lts)));
2219}
2220
2221/*
2222 * In case when the Linux thread is the initial thread in
2223 * the thread group thread id is equal to the process id.
2224 * Glibc depends on this magic (assert in pthread_getattr_np.c).
2225 */
2226struct thread *
2227linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid)
2228{
2229	struct linux_emuldata *em;
2230	struct thread *tdt;
2231	struct proc *p;
2232
2233	tdt = NULL;
2234	if (tid == 0 || tid == td->td_tid) {
2235		tdt = td;
2236		PROC_LOCK(tdt->td_proc);
2237	} else if (tid > PID_MAX)
2238		tdt = tdfind(tid, pid);
2239	else {
2240		/*
2241		 * Initial thread where the tid equal to the pid.
2242		 */
2243		p = pfind(tid);
2244		if (p != NULL) {
2245			if (SV_PROC_ABI(p) != SV_ABI_LINUX) {
2246				/*
2247				 * p is not a Linuxulator process.
2248				 */
2249				PROC_UNLOCK(p);
2250				return (NULL);
2251			}
2252			FOREACH_THREAD_IN_PROC(p, tdt) {
2253				em = em_find(tdt);
2254				if (tid == em->em_tid)
2255					return (tdt);
2256			}
2257			PROC_UNLOCK(p);
2258		}
2259		return (NULL);
2260	}
2261
2262	return (tdt);
2263}
2264
2265void
2266linux_to_bsd_waitopts(int options, int *bsdopts)
2267{
2268
2269	if (options & LINUX_WNOHANG)
2270		*bsdopts |= WNOHANG;
2271	if (options & LINUX_WUNTRACED)
2272		*bsdopts |= WUNTRACED;
2273	if (options & LINUX_WEXITED)
2274		*bsdopts |= WEXITED;
2275	if (options & LINUX_WCONTINUED)
2276		*bsdopts |= WCONTINUED;
2277	if (options & LINUX_WNOWAIT)
2278		*bsdopts |= WNOWAIT;
2279
2280	if (options & __WCLONE)
2281		*bsdopts |= WLINUXCLONE;
2282}
2283