1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
5 * Copyright (c) 2003-2005 SPARTA, Inc.
6 * Copyright (c) 2005, 2016-2017 Robert N. M. Watson
7 * All rights reserved.
8 *
9 * This software was developed for the FreeBSD Project in part by Network
10 * Associates Laboratories, the Security Research Division of Network
11 * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
12 * as part of the DARPA CHATS research program.
13 *
14 * Portions of this software were developed by BAE Systems, the University of
15 * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL
16 * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent
17 * Computing (TC) research program.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 */
40
41#include <sys/cdefs.h>
42#include "opt_posix.h"
43
44#include <sys/param.h>
45#include <sys/capsicum.h>
46#include <sys/condvar.h>
47#include <sys/fcntl.h>
48#include <sys/file.h>
49#include <sys/filedesc.h>
50#include <sys/fnv_hash.h>
51#include <sys/jail.h>
52#include <sys/kernel.h>
53#include <sys/ksem.h>
54#include <sys/lock.h>
55#include <sys/malloc.h>
56#include <sys/module.h>
57#include <sys/mutex.h>
58#include <sys/priv.h>
59#include <sys/proc.h>
60#include <sys/posix4.h>
61#include <sys/_semaphore.h>
62#include <sys/stat.h>
63#include <sys/syscall.h>
64#include <sys/syscallsubr.h>
65#include <sys/sysctl.h>
66#include <sys/sysent.h>
67#include <sys/sysproto.h>
68#include <sys/systm.h>
69#include <sys/sx.h>
70#include <sys/user.h>
71#include <sys/vnode.h>
72
73#include <security/audit/audit.h>
74#include <security/mac/mac_framework.h>
75
76FEATURE(p1003_1b_semaphores, "POSIX P1003.1B semaphores support");
77/*
78 * TODO
79 *
80 * - Resource limits?
81 * - Replace global sem_lock with mtx_pool locks?
82 * - Add a MAC check_create() hook for creating new named semaphores.
83 */
84
85#ifndef SEM_MAX
86#define	SEM_MAX	30
87#endif
88
89#ifdef SEM_DEBUG
90#define	DP(x)	printf x
91#else
92#define	DP(x)
93#endif
94
95struct ksem_mapping {
96	char		*km_path;
97	Fnv32_t		km_fnv;
98	struct ksem	*km_ksem;
99	LIST_ENTRY(ksem_mapping) km_link;
100};
101
102static MALLOC_DEFINE(M_KSEM, "ksem", "semaphore file descriptor");
103static LIST_HEAD(, ksem_mapping) *ksem_dictionary;
104static struct sx ksem_dict_lock;
105static struct mtx ksem_count_lock;
106static struct mtx sem_lock;
107static u_long ksem_hash;
108static int ksem_dead;
109
110#define	KSEM_HASH(fnv)	(&ksem_dictionary[(fnv) & ksem_hash])
111
112static int nsems = 0;
113SYSCTL_DECL(_p1003_1b);
114SYSCTL_INT(_p1003_1b, OID_AUTO, nsems, CTLFLAG_RD, &nsems, 0,
115    "Number of active kernel POSIX semaphores");
116
117static int	kern_sem_wait(struct thread *td, semid_t id, int tryflag,
118		    struct timespec *abstime);
119static int	ksem_access(struct ksem *ks, struct ucred *ucred);
120static struct ksem *ksem_alloc(struct ucred *ucred, mode_t mode,
121		    unsigned int value);
122static int	ksem_create(struct thread *td, const char *path,
123		    semid_t *semidp, mode_t mode, unsigned int value,
124		    int flags, int compat32);
125static void	ksem_drop(struct ksem *ks);
126static int	ksem_get(struct thread *td, semid_t id, cap_rights_t *rightsp,
127    struct file **fpp);
128static struct ksem *ksem_hold(struct ksem *ks);
129static void	ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks);
130static struct ksem *ksem_lookup(char *path, Fnv32_t fnv);
131static void	ksem_module_destroy(void);
132static int	ksem_module_init(void);
133static int	ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
134static int	sem_modload(struct module *module, int cmd, void *arg);
135
136static fo_stat_t	ksem_stat;
137static fo_close_t	ksem_closef;
138static fo_chmod_t	ksem_chmod;
139static fo_chown_t	ksem_chown;
140static fo_fill_kinfo_t	ksem_fill_kinfo;
141
142/* File descriptor operations. */
143static struct fileops ksem_ops = {
144	.fo_read = invfo_rdwr,
145	.fo_write = invfo_rdwr,
146	.fo_truncate = invfo_truncate,
147	.fo_ioctl = invfo_ioctl,
148	.fo_poll = invfo_poll,
149	.fo_kqfilter = invfo_kqfilter,
150	.fo_stat = ksem_stat,
151	.fo_close = ksem_closef,
152	.fo_chmod = ksem_chmod,
153	.fo_chown = ksem_chown,
154	.fo_sendfile = invfo_sendfile,
155	.fo_fill_kinfo = ksem_fill_kinfo,
156	.fo_cmp = file_kcmp_generic,
157	.fo_flags = DFLAG_PASSABLE
158};
159
160FEATURE(posix_sem, "POSIX semaphores");
161
162static int
163ksem_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
164{
165	struct ksem *ks;
166#ifdef MAC
167	int error;
168#endif
169
170	ks = fp->f_data;
171
172#ifdef MAC
173	error = mac_posixsem_check_stat(active_cred, fp->f_cred, ks);
174	if (error)
175		return (error);
176#endif
177
178	/*
179	 * Attempt to return sanish values for fstat() on a semaphore
180	 * file descriptor.
181	 */
182	bzero(sb, sizeof(*sb));
183
184	mtx_lock(&sem_lock);
185	sb->st_atim = ks->ks_atime;
186	sb->st_ctim = ks->ks_ctime;
187	sb->st_mtim = ks->ks_mtime;
188	sb->st_birthtim = ks->ks_birthtime;
189	sb->st_uid = ks->ks_uid;
190	sb->st_gid = ks->ks_gid;
191	sb->st_mode = S_IFREG | ks->ks_mode;		/* XXX */
192	mtx_unlock(&sem_lock);
193
194	return (0);
195}
196
197static int
198ksem_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
199    struct thread *td)
200{
201	struct ksem *ks;
202	int error;
203
204	error = 0;
205	ks = fp->f_data;
206	mtx_lock(&sem_lock);
207#ifdef MAC
208	error = mac_posixsem_check_setmode(active_cred, ks, mode);
209	if (error != 0)
210		goto out;
211#endif
212	error = vaccess(VREG, ks->ks_mode, ks->ks_uid, ks->ks_gid, VADMIN,
213	    active_cred);
214	if (error != 0)
215		goto out;
216	ks->ks_mode = mode & ACCESSPERMS;
217out:
218	mtx_unlock(&sem_lock);
219	return (error);
220}
221
222static int
223ksem_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
224    struct thread *td)
225{
226	struct ksem *ks;
227	int error;
228
229	error = 0;
230	ks = fp->f_data;
231	mtx_lock(&sem_lock);
232#ifdef MAC
233	error = mac_posixsem_check_setowner(active_cred, ks, uid, gid);
234	if (error != 0)
235		goto out;
236#endif
237	if (uid == (uid_t)-1)
238		uid = ks->ks_uid;
239	if (gid == (gid_t)-1)
240                 gid = ks->ks_gid;
241	if (((uid != ks->ks_uid && uid != active_cred->cr_uid) ||
242	    (gid != ks->ks_gid && !groupmember(gid, active_cred))) &&
243	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN)))
244		goto out;
245	ks->ks_uid = uid;
246	ks->ks_gid = gid;
247out:
248	mtx_unlock(&sem_lock);
249	return (error);
250}
251
252static int
253ksem_closef(struct file *fp, struct thread *td)
254{
255	struct ksem *ks;
256
257	ks = fp->f_data;
258	fp->f_data = NULL;
259	ksem_drop(ks);
260
261	return (0);
262}
263
264static int
265ksem_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
266{
267	const char *path, *pr_path;
268	struct ksem *ks;
269	size_t pr_pathlen;
270
271	kif->kf_type = KF_TYPE_SEM;
272	ks = fp->f_data;
273	mtx_lock(&sem_lock);
274	kif->kf_un.kf_sem.kf_sem_value = ks->ks_value;
275	kif->kf_un.kf_sem.kf_sem_mode = S_IFREG | ks->ks_mode;	/* XXX */
276	mtx_unlock(&sem_lock);
277	if (ks->ks_path != NULL) {
278		sx_slock(&ksem_dict_lock);
279		if (ks->ks_path != NULL) {
280			path = ks->ks_path;
281			pr_path = curthread->td_ucred->cr_prison->pr_path;
282			if (strcmp(pr_path, "/") != 0) {
283				/* Return the jail-rooted pathname. */
284				pr_pathlen = strlen(pr_path);
285				if (strncmp(path, pr_path, pr_pathlen) == 0 &&
286				    path[pr_pathlen] == '/')
287					path += pr_pathlen;
288			}
289			strlcpy(kif->kf_path, path, sizeof(kif->kf_path));
290		}
291		sx_sunlock(&ksem_dict_lock);
292	}
293	return (0);
294}
295
296/*
297 * ksem object management including creation and reference counting
298 * routines.
299 */
300static struct ksem *
301ksem_alloc(struct ucred *ucred, mode_t mode, unsigned int value)
302{
303	struct ksem *ks;
304
305	mtx_lock(&ksem_count_lock);
306	if (nsems == p31b_getcfg(CTL_P1003_1B_SEM_NSEMS_MAX) || ksem_dead) {
307		mtx_unlock(&ksem_count_lock);
308		return (NULL);
309	}
310	nsems++;
311	mtx_unlock(&ksem_count_lock);
312	ks = malloc(sizeof(*ks), M_KSEM, M_WAITOK | M_ZERO);
313	ks->ks_uid = ucred->cr_uid;
314	ks->ks_gid = ucred->cr_gid;
315	ks->ks_mode = mode;
316	ks->ks_value = value;
317	cv_init(&ks->ks_cv, "ksem");
318	vfs_timestamp(&ks->ks_birthtime);
319	ks->ks_atime = ks->ks_mtime = ks->ks_ctime = ks->ks_birthtime;
320	refcount_init(&ks->ks_ref, 1);
321#ifdef MAC
322	mac_posixsem_init(ks);
323	mac_posixsem_create(ucred, ks);
324#endif
325
326	return (ks);
327}
328
329static struct ksem *
330ksem_hold(struct ksem *ks)
331{
332
333	refcount_acquire(&ks->ks_ref);
334	return (ks);
335}
336
337static void
338ksem_drop(struct ksem *ks)
339{
340
341	if (refcount_release(&ks->ks_ref)) {
342#ifdef MAC
343		mac_posixsem_destroy(ks);
344#endif
345		cv_destroy(&ks->ks_cv);
346		free(ks, M_KSEM);
347		mtx_lock(&ksem_count_lock);
348		nsems--;
349		mtx_unlock(&ksem_count_lock);
350	}
351}
352
353/*
354 * Determine if the credentials have sufficient permissions for read
355 * and write access.
356 */
357static int
358ksem_access(struct ksem *ks, struct ucred *ucred)
359{
360	int error;
361
362	error = vaccess(VREG, ks->ks_mode, ks->ks_uid, ks->ks_gid,
363	    VREAD | VWRITE, ucred);
364	if (error)
365		error = priv_check_cred(ucred, PRIV_SEM_WRITE);
366	return (error);
367}
368
369/*
370 * Dictionary management.  We maintain an in-kernel dictionary to map
371 * paths to semaphore objects.  We use the FNV hash on the path to
372 * store the mappings in a hash table.
373 */
374static struct ksem *
375ksem_lookup(char *path, Fnv32_t fnv)
376{
377	struct ksem_mapping *map;
378
379	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
380		if (map->km_fnv != fnv)
381			continue;
382		if (strcmp(map->km_path, path) == 0)
383			return (map->km_ksem);
384	}
385
386	return (NULL);
387}
388
389static void
390ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks)
391{
392	struct ksem_mapping *map;
393
394	map = malloc(sizeof(struct ksem_mapping), M_KSEM, M_WAITOK);
395	map->km_path = path;
396	map->km_fnv = fnv;
397	map->km_ksem = ksem_hold(ks);
398	ks->ks_path = path;
399	LIST_INSERT_HEAD(KSEM_HASH(fnv), map, km_link);
400}
401
402static int
403ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred)
404{
405	struct ksem_mapping *map;
406	int error;
407
408	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
409		if (map->km_fnv != fnv)
410			continue;
411		if (strcmp(map->km_path, path) == 0) {
412#ifdef MAC
413			error = mac_posixsem_check_unlink(ucred, map->km_ksem);
414			if (error)
415				return (error);
416#endif
417			error = ksem_access(map->km_ksem, ucred);
418			if (error)
419				return (error);
420			map->km_ksem->ks_path = NULL;
421			LIST_REMOVE(map, km_link);
422			ksem_drop(map->km_ksem);
423			free(map->km_path, M_KSEM);
424			free(map, M_KSEM);
425			return (0);
426		}
427	}
428
429	return (ENOENT);
430}
431
432static int
433ksem_create_copyout_semid(struct thread *td, semid_t *semidp, int fd,
434    int compat32)
435{
436	semid_t semid;
437#ifdef COMPAT_FREEBSD32
438	int32_t semid32;
439#endif
440	void *ptr;
441	size_t ptrs;
442
443#ifdef COMPAT_FREEBSD32
444	if (compat32) {
445		semid32 = fd;
446		ptr = &semid32;
447		ptrs = sizeof(semid32);
448	} else {
449#endif
450		semid = fd;
451		ptr = &semid;
452		ptrs = sizeof(semid);
453		compat32 = 0; /* silence gcc */
454#ifdef COMPAT_FREEBSD32
455	}
456#endif
457
458	return (copyout(ptr, semidp, ptrs));
459}
460
461/* Other helper routines. */
462static int
463ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode,
464    unsigned int value, int flags, int compat32)
465{
466	struct pwddesc *pdp;
467	struct ksem *ks;
468	struct file *fp;
469	char *path;
470	const char *pr_path;
471	size_t pr_pathlen;
472	Fnv32_t fnv;
473	int error, fd;
474
475	AUDIT_ARG_FFLAGS(flags);
476	AUDIT_ARG_MODE(mode);
477	AUDIT_ARG_VALUE(value);
478
479	if (value > SEM_VALUE_MAX)
480		return (EINVAL);
481
482	pdp = td->td_proc->p_pd;
483	mode = (mode & ~pdp->pd_cmask) & ACCESSPERMS;
484	error = falloc(td, &fp, &fd, O_CLOEXEC);
485	if (error) {
486		if (name == NULL)
487			error = ENOSPC;
488		return (error);
489	}
490
491	/*
492	 * Go ahead and copyout the file descriptor now.  This is a bit
493	 * premature, but it is a lot easier to handle errors as opposed
494	 * to later when we've possibly created a new semaphore, etc.
495	 */
496	error = ksem_create_copyout_semid(td, semidp, fd, compat32);
497	if (error) {
498		fdclose(td, fp, fd);
499		fdrop(fp, td);
500		return (error);
501	}
502
503	if (name == NULL) {
504		/* Create an anonymous semaphore. */
505		ks = ksem_alloc(td->td_ucred, mode, value);
506		if (ks == NULL)
507			error = ENOSPC;
508		else
509			ks->ks_flags |= KS_ANONYMOUS;
510	} else {
511		path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK);
512		pr_path = td->td_ucred->cr_prison->pr_path;
513
514		/* Construct a full pathname for jailed callers. */
515		pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
516		    : strlcpy(path, pr_path, MAXPATHLEN);
517		error = copyinstr(name, path + pr_pathlen,
518		    MAXPATHLEN - pr_pathlen, NULL);
519
520		/* Require paths to start with a '/' character. */
521		if (error == 0 && path[pr_pathlen] != '/')
522			error = EINVAL;
523		if (error) {
524			fdclose(td, fp, fd);
525			fdrop(fp, td);
526			free(path, M_KSEM);
527			return (error);
528		}
529
530		AUDIT_ARG_UPATH1_CANON(path);
531		fnv = fnv_32_str(path, FNV1_32_INIT);
532		sx_xlock(&ksem_dict_lock);
533		ks = ksem_lookup(path, fnv);
534		if (ks == NULL) {
535			/* Object does not exist, create it if requested. */
536			if (flags & O_CREAT) {
537				ks = ksem_alloc(td->td_ucred, mode, value);
538				if (ks == NULL)
539					error = ENFILE;
540				else {
541					ksem_insert(path, fnv, ks);
542					path = NULL;
543				}
544			} else
545				error = ENOENT;
546		} else {
547			/*
548			 * Object already exists, obtain a new
549			 * reference if requested and permitted.
550			 */
551			if ((flags & (O_CREAT | O_EXCL)) ==
552			    (O_CREAT | O_EXCL))
553				error = EEXIST;
554			else {
555#ifdef MAC
556				error = mac_posixsem_check_open(td->td_ucred,
557				    ks);
558				if (error == 0)
559#endif
560				error = ksem_access(ks, td->td_ucred);
561			}
562			if (error == 0)
563				ksem_hold(ks);
564#ifdef INVARIANTS
565			else
566				ks = NULL;
567#endif
568		}
569		sx_xunlock(&ksem_dict_lock);
570		if (path)
571			free(path, M_KSEM);
572	}
573
574	if (error) {
575		KASSERT(ks == NULL, ("ksem_create error with a ksem"));
576		fdclose(td, fp, fd);
577		fdrop(fp, td);
578		return (error);
579	}
580	KASSERT(ks != NULL, ("ksem_create w/o a ksem"));
581
582	finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops);
583
584	fdrop(fp, td);
585
586	return (0);
587}
588
589static int
590ksem_get(struct thread *td, semid_t id, cap_rights_t *rightsp,
591    struct file **fpp)
592{
593	struct ksem *ks;
594	struct file *fp;
595	int error;
596
597	error = fget(td, id, rightsp, &fp);
598	if (error)
599		return (EINVAL);
600	if (fp->f_type != DTYPE_SEM) {
601		fdrop(fp, td);
602		return (EINVAL);
603	}
604	ks = fp->f_data;
605	if (ks->ks_flags & KS_DEAD) {
606		fdrop(fp, td);
607		return (EINVAL);
608	}
609	*fpp = fp;
610	return (0);
611}
612
613/* System calls. */
614#ifndef _SYS_SYSPROTO_H_
615struct ksem_init_args {
616	unsigned int	value;
617	semid_t		*idp;
618};
619#endif
620int
621sys_ksem_init(struct thread *td, struct ksem_init_args *uap)
622{
623
624	return (ksem_create(td, NULL, uap->idp, S_IRWXU | S_IRWXG, uap->value,
625	    0, 0));
626}
627
628#ifndef _SYS_SYSPROTO_H_
629struct ksem_open_args {
630	char		*name;
631	int		oflag;
632	mode_t		mode;
633	unsigned int	value;
634	semid_t		*idp;
635};
636#endif
637int
638sys_ksem_open(struct thread *td, struct ksem_open_args *uap)
639{
640
641	DP((">>> ksem_open start, pid=%d\n", (int)td->td_proc->p_pid));
642
643	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
644		return (EINVAL);
645	return (ksem_create(td, uap->name, uap->idp, uap->mode, uap->value,
646	    uap->oflag, 0));
647}
648
649#ifndef _SYS_SYSPROTO_H_
650struct ksem_unlink_args {
651	char		*name;
652};
653#endif
654int
655sys_ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
656{
657	char *path;
658	const char *pr_path;
659	size_t pr_pathlen;
660	Fnv32_t fnv;
661	int error;
662
663	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
664	pr_path = td->td_ucred->cr_prison->pr_path;
665	pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
666	    : strlcpy(path, pr_path, MAXPATHLEN);
667	error = copyinstr(uap->name, path + pr_pathlen, MAXPATHLEN - pr_pathlen,
668	    NULL);
669	if (error) {
670		free(path, M_TEMP);
671		return (error);
672	}
673
674	AUDIT_ARG_UPATH1_CANON(path);
675	fnv = fnv_32_str(path, FNV1_32_INIT);
676	sx_xlock(&ksem_dict_lock);
677	error = ksem_remove(path, fnv, td->td_ucred);
678	sx_xunlock(&ksem_dict_lock);
679	free(path, M_TEMP);
680
681	return (error);
682}
683
684#ifndef _SYS_SYSPROTO_H_
685struct ksem_close_args {
686	semid_t		id;
687};
688#endif
689int
690sys_ksem_close(struct thread *td, struct ksem_close_args *uap)
691{
692	struct ksem *ks;
693	struct file *fp;
694	int error;
695
696	/* No capability rights required to close a semaphore. */
697	AUDIT_ARG_FD(uap->id);
698	error = ksem_get(td, uap->id, &cap_no_rights, &fp);
699	if (error)
700		return (error);
701	ks = fp->f_data;
702	if (ks->ks_flags & KS_ANONYMOUS) {
703		fdrop(fp, td);
704		return (EINVAL);
705	}
706	error = kern_close(td, uap->id);
707	fdrop(fp, td);
708	return (error);
709}
710
711#ifndef _SYS_SYSPROTO_H_
712struct ksem_post_args {
713	semid_t	id;
714};
715#endif
716int
717sys_ksem_post(struct thread *td, struct ksem_post_args *uap)
718{
719	cap_rights_t rights;
720	struct file *fp;
721	struct ksem *ks;
722	int error;
723
724	AUDIT_ARG_FD(uap->id);
725	error = ksem_get(td, uap->id,
726	    cap_rights_init_one(&rights, CAP_SEM_POST), &fp);
727	if (error)
728		return (error);
729	ks = fp->f_data;
730
731	mtx_lock(&sem_lock);
732#ifdef MAC
733	error = mac_posixsem_check_post(td->td_ucred, fp->f_cred, ks);
734	if (error)
735		goto err;
736#endif
737	if (ks->ks_value == SEM_VALUE_MAX) {
738		error = EOVERFLOW;
739		goto err;
740	}
741	++ks->ks_value;
742	if (ks->ks_waiters > 0)
743		cv_signal(&ks->ks_cv);
744	error = 0;
745	vfs_timestamp(&ks->ks_ctime);
746err:
747	mtx_unlock(&sem_lock);
748	fdrop(fp, td);
749	return (error);
750}
751
752#ifndef _SYS_SYSPROTO_H_
753struct ksem_wait_args {
754	semid_t		id;
755};
756#endif
757int
758sys_ksem_wait(struct thread *td, struct ksem_wait_args *uap)
759{
760
761	return (kern_sem_wait(td, uap->id, 0, NULL));
762}
763
764#ifndef _SYS_SYSPROTO_H_
765struct ksem_timedwait_args {
766	semid_t		id;
767	const struct timespec *abstime;
768};
769#endif
770int
771sys_ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap)
772{
773	struct timespec abstime;
774	struct timespec *ts;
775	int error;
776
777	/*
778	 * We allow a null timespec (wait forever).
779	 */
780	if (uap->abstime == NULL)
781		ts = NULL;
782	else {
783		error = copyin(uap->abstime, &abstime, sizeof(abstime));
784		if (error != 0)
785			return (error);
786		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
787			return (EINVAL);
788		ts = &abstime;
789	}
790	return (kern_sem_wait(td, uap->id, 0, ts));
791}
792
793#ifndef _SYS_SYSPROTO_H_
794struct ksem_trywait_args {
795	semid_t		id;
796};
797#endif
798int
799sys_ksem_trywait(struct thread *td, struct ksem_trywait_args *uap)
800{
801
802	return (kern_sem_wait(td, uap->id, 1, NULL));
803}
804
805static int
806kern_sem_wait(struct thread *td, semid_t id, int tryflag,
807    struct timespec *abstime)
808{
809	struct timespec ts1, ts2;
810	struct timeval tv;
811	cap_rights_t rights;
812	struct file *fp;
813	struct ksem *ks;
814	int error;
815
816	DP((">>> kern_sem_wait entered! pid=%d\n", (int)td->td_proc->p_pid));
817	AUDIT_ARG_FD(id);
818	error = ksem_get(td, id, cap_rights_init_one(&rights, CAP_SEM_WAIT),
819	    &fp);
820	if (error)
821		return (error);
822	ks = fp->f_data;
823	mtx_lock(&sem_lock);
824	DP((">>> kern_sem_wait critical section entered! pid=%d\n",
825	    (int)td->td_proc->p_pid));
826#ifdef MAC
827	error = mac_posixsem_check_wait(td->td_ucred, fp->f_cred, ks);
828	if (error) {
829		DP(("kern_sem_wait mac failed\n"));
830		goto err;
831	}
832#endif
833	DP(("kern_sem_wait value = %d, tryflag %d\n", ks->ks_value, tryflag));
834	vfs_timestamp(&ks->ks_atime);
835	while (ks->ks_value == 0) {
836		ks->ks_waiters++;
837		if (tryflag != 0)
838			error = EAGAIN;
839		else if (abstime == NULL)
840			error = cv_wait_sig(&ks->ks_cv, &sem_lock);
841		else {
842			for (;;) {
843				ts1 = *abstime;
844				getnanotime(&ts2);
845				timespecsub(&ts1, &ts2, &ts1);
846				TIMESPEC_TO_TIMEVAL(&tv, &ts1);
847				if (tv.tv_sec < 0) {
848					error = ETIMEDOUT;
849					break;
850				}
851				error = cv_timedwait_sig(&ks->ks_cv,
852				    &sem_lock, tvtohz(&tv));
853				if (error != EWOULDBLOCK)
854					break;
855			}
856		}
857		ks->ks_waiters--;
858		if (error)
859			goto err;
860	}
861	ks->ks_value--;
862	DP(("kern_sem_wait value post-decrement = %d\n", ks->ks_value));
863	error = 0;
864err:
865	mtx_unlock(&sem_lock);
866	fdrop(fp, td);
867	DP(("<<< kern_sem_wait leaving, pid=%d, error = %d\n",
868	    (int)td->td_proc->p_pid, error));
869	return (error);
870}
871
872#ifndef _SYS_SYSPROTO_H_
873struct ksem_getvalue_args {
874	semid_t		id;
875	int		*val;
876};
877#endif
878int
879sys_ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap)
880{
881	cap_rights_t rights;
882	struct file *fp;
883	struct ksem *ks;
884	int error, val;
885
886	AUDIT_ARG_FD(uap->id);
887	error = ksem_get(td, uap->id,
888	    cap_rights_init_one(&rights, CAP_SEM_GETVALUE), &fp);
889	if (error)
890		return (error);
891	ks = fp->f_data;
892
893	mtx_lock(&sem_lock);
894#ifdef MAC
895	error = mac_posixsem_check_getvalue(td->td_ucred, fp->f_cred, ks);
896	if (error) {
897		mtx_unlock(&sem_lock);
898		fdrop(fp, td);
899		return (error);
900	}
901#endif
902	val = ks->ks_value;
903	vfs_timestamp(&ks->ks_atime);
904	mtx_unlock(&sem_lock);
905	fdrop(fp, td);
906	error = copyout(&val, uap->val, sizeof(val));
907	return (error);
908}
909
910#ifndef _SYS_SYSPROTO_H_
911struct ksem_destroy_args {
912	semid_t		id;
913};
914#endif
915int
916sys_ksem_destroy(struct thread *td, struct ksem_destroy_args *uap)
917{
918	struct file *fp;
919	struct ksem *ks;
920	int error;
921
922	/* No capability rights required to close a semaphore. */
923	AUDIT_ARG_FD(uap->id);
924	error = ksem_get(td, uap->id, &cap_no_rights, &fp);
925	if (error)
926		return (error);
927	ks = fp->f_data;
928	if (!(ks->ks_flags & KS_ANONYMOUS)) {
929		fdrop(fp, td);
930		return (EINVAL);
931	}
932	mtx_lock(&sem_lock);
933	if (ks->ks_waiters != 0) {
934		mtx_unlock(&sem_lock);
935		error = EBUSY;
936		goto err;
937	}
938	ks->ks_flags |= KS_DEAD;
939	mtx_unlock(&sem_lock);
940
941	error = kern_close(td, uap->id);
942err:
943	fdrop(fp, td);
944	return (error);
945}
946
947static struct syscall_helper_data ksem_syscalls[] = {
948	SYSCALL_INIT_HELPER(ksem_init),
949	SYSCALL_INIT_HELPER(ksem_open),
950	SYSCALL_INIT_HELPER(ksem_unlink),
951	SYSCALL_INIT_HELPER(ksem_close),
952	SYSCALL_INIT_HELPER(ksem_post),
953	SYSCALL_INIT_HELPER(ksem_wait),
954	SYSCALL_INIT_HELPER(ksem_timedwait),
955	SYSCALL_INIT_HELPER(ksem_trywait),
956	SYSCALL_INIT_HELPER(ksem_getvalue),
957	SYSCALL_INIT_HELPER(ksem_destroy),
958	SYSCALL_INIT_LAST
959};
960
961#ifdef COMPAT_FREEBSD32
962#include <compat/freebsd32/freebsd32.h>
963#include <compat/freebsd32/freebsd32_proto.h>
964#include <compat/freebsd32/freebsd32_signal.h>
965#include <compat/freebsd32/freebsd32_syscall.h>
966#include <compat/freebsd32/freebsd32_util.h>
967
968int
969freebsd32_ksem_init(struct thread *td, struct freebsd32_ksem_init_args *uap)
970{
971
972	return (ksem_create(td, NULL, (semid_t *)uap->idp, S_IRWXU | S_IRWXG, uap->value,
973	    0, 1));
974}
975
976int
977freebsd32_ksem_open(struct thread *td, struct freebsd32_ksem_open_args *uap)
978{
979
980	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
981		return (EINVAL);
982	return (ksem_create(td, uap->name, (semid_t *)uap->idp, uap->mode, uap->value,
983	    uap->oflag, 1));
984}
985
986int
987freebsd32_ksem_timedwait(struct thread *td,
988    struct freebsd32_ksem_timedwait_args *uap)
989{
990	struct timespec32 abstime32;
991	struct timespec *ts, abstime;
992	int error;
993
994	/*
995	 * We allow a null timespec (wait forever).
996	 */
997	if (uap->abstime == NULL)
998		ts = NULL;
999	else {
1000		error = copyin(uap->abstime, &abstime32, sizeof(abstime32));
1001		if (error != 0)
1002			return (error);
1003		CP(abstime32, abstime, tv_sec);
1004		CP(abstime32, abstime, tv_nsec);
1005		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
1006			return (EINVAL);
1007		ts = &abstime;
1008	}
1009	return (kern_sem_wait(td, uap->id, 0, ts));
1010}
1011
1012static struct syscall_helper_data ksem32_syscalls[] = {
1013	SYSCALL32_INIT_HELPER(freebsd32_ksem_init),
1014	SYSCALL32_INIT_HELPER(freebsd32_ksem_open),
1015	SYSCALL32_INIT_HELPER_COMPAT(ksem_unlink),
1016	SYSCALL32_INIT_HELPER_COMPAT(ksem_close),
1017	SYSCALL32_INIT_HELPER_COMPAT(ksem_post),
1018	SYSCALL32_INIT_HELPER_COMPAT(ksem_wait),
1019	SYSCALL32_INIT_HELPER(freebsd32_ksem_timedwait),
1020	SYSCALL32_INIT_HELPER_COMPAT(ksem_trywait),
1021	SYSCALL32_INIT_HELPER_COMPAT(ksem_getvalue),
1022	SYSCALL32_INIT_HELPER_COMPAT(ksem_destroy),
1023	SYSCALL_INIT_LAST
1024};
1025#endif
1026
1027static int
1028ksem_module_init(void)
1029{
1030	int error;
1031
1032	mtx_init(&sem_lock, "sem", NULL, MTX_DEF);
1033	mtx_init(&ksem_count_lock, "ksem count", NULL, MTX_DEF);
1034	sx_init(&ksem_dict_lock, "ksem dictionary");
1035	ksem_dictionary = hashinit(1024, M_KSEM, &ksem_hash);
1036	p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 200112L);
1037	p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
1038	p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
1039
1040	error = syscall_helper_register(ksem_syscalls, SY_THR_STATIC_KLD);
1041	if (error)
1042		return (error);
1043#ifdef COMPAT_FREEBSD32
1044	error = syscall32_helper_register(ksem32_syscalls, SY_THR_STATIC_KLD);
1045	if (error)
1046		return (error);
1047#endif
1048	return (0);
1049}
1050
1051static void
1052ksem_module_destroy(void)
1053{
1054
1055#ifdef COMPAT_FREEBSD32
1056	syscall32_helper_unregister(ksem32_syscalls);
1057#endif
1058	syscall_helper_unregister(ksem_syscalls);
1059
1060	p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 0);
1061	hashdestroy(ksem_dictionary, M_KSEM, ksem_hash);
1062	sx_destroy(&ksem_dict_lock);
1063	mtx_destroy(&ksem_count_lock);
1064	mtx_destroy(&sem_lock);
1065	p31b_unsetcfg(CTL_P1003_1B_SEM_VALUE_MAX);
1066	p31b_unsetcfg(CTL_P1003_1B_SEM_NSEMS_MAX);
1067}
1068
1069static int
1070sem_modload(struct module *module, int cmd, void *arg)
1071{
1072        int error = 0;
1073
1074        switch (cmd) {
1075        case MOD_LOAD:
1076		error = ksem_module_init();
1077		if (error)
1078			ksem_module_destroy();
1079                break;
1080
1081        case MOD_UNLOAD:
1082		mtx_lock(&ksem_count_lock);
1083		if (nsems != 0) {
1084			error = EOPNOTSUPP;
1085			mtx_unlock(&ksem_count_lock);
1086			break;
1087		}
1088		ksem_dead = 1;
1089		mtx_unlock(&ksem_count_lock);
1090		ksem_module_destroy();
1091                break;
1092
1093        case MOD_SHUTDOWN:
1094                break;
1095        default:
1096                error = EINVAL;
1097                break;
1098        }
1099        return (error);
1100}
1101
1102static moduledata_t sem_mod = {
1103        "sem",
1104        &sem_modload,
1105        NULL
1106};
1107
1108DECLARE_MODULE(sem, sem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
1109MODULE_VERSION(sem, 1);
1110