sysv_sem.c revision 298835
1/*- 2 * Implementation of SVID semaphores 3 * 4 * Author: Daniel Boulet 5 * 6 * This software is provided ``AS IS'' without any warranties of any kind. 7 */ 8/*- 9 * Copyright (c) 2003-2005 McAfee, Inc. 10 * All rights reserved. 11 * 12 * This software was developed for the FreeBSD Project in part by McAfee 13 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR 14 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research 15 * program. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39#include <sys/cdefs.h> 40__FBSDID("$FreeBSD: stable/10/sys/kern/sysv_sem.c 298835 2016-04-30 04:02:32Z jamie $"); 41 42#include "opt_compat.h" 43#include "opt_sysvipc.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/sysproto.h> 48#include <sys/eventhandler.h> 49#include <sys/kernel.h> 50#include <sys/proc.h> 51#include <sys/lock.h> 52#include <sys/module.h> 53#include <sys/mutex.h> 54#include <sys/racct.h> 55#include <sys/sem.h> 56#include <sys/sx.h> 57#include <sys/syscall.h> 58#include <sys/syscallsubr.h> 59#include <sys/sysent.h> 60#include <sys/sysctl.h> 61#include <sys/uio.h> 62#include <sys/malloc.h> 63#include <sys/jail.h> 64 65#include <security/mac/mac_framework.h> 66 67FEATURE(sysv_sem, "System V semaphores support"); 68 69static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores"); 70 71#ifdef SEM_DEBUG 72#define DPRINTF(a) printf a 73#else 74#define DPRINTF(a) 75#endif 76 77static int seminit(void); 78static int sysvsem_modload(struct module *, int, void *); 79static int semunload(void); 80static void semexit_myhook(void *arg, struct proc *p); 81static int sysctl_sema(SYSCTL_HANDLER_ARGS); 82static int semvalid(int semid, struct prison *rpr, 83 struct semid_kernel *semakptr); 84static void sem_remove(int semidx, struct ucred *cred); 85static struct prison *sem_find_prison(struct ucred *); 86static int sem_prison_cansee(struct prison *, struct semid_kernel *); 87static int sem_prison_check(void *, void *); 88static int sem_prison_set(void *, void *); 89static int sem_prison_get(void *, void *); 90static int sem_prison_remove(void *, void *); 91static void sem_prison_cleanup(struct prison *); 92 93#ifndef _SYS_SYSPROTO_H_ 94struct __semctl_args; 95int __semctl(struct thread *td, struct __semctl_args *uap); 96struct semget_args; 97int semget(struct thread *td, struct semget_args *uap); 98struct semop_args; 99int semop(struct thread *td, struct semop_args *uap); 100#endif 101 102static struct sem_undo *semu_alloc(struct thread *td); 103static int semundo_adjust(struct thread *td, struct sem_undo **supptr, 104 int semid, int semseq, int semnum, int adjval); 105static void semundo_clear(int semid, int semnum); 106 107static struct mtx sem_mtx; /* semaphore global lock */ 108static struct mtx sem_undo_mtx; 109static int semtot = 0; 110static struct semid_kernel *sema; /* semaphore id pool */ 111static struct mtx *sema_mtx; /* semaphore id pool mutexes*/ 112static struct sem *sem; /* semaphore pool */ 113LIST_HEAD(, sem_undo) semu_list; /* list of active undo structures */ 114LIST_HEAD(, sem_undo) semu_free_list; /* list of free undo structures */ 115static int *semu; /* undo structure pool */ 116static eventhandler_tag semexit_tag; 117static unsigned sem_prison_slot; /* prison OSD slot */ 118 119#define SEMUNDO_MTX sem_undo_mtx 120#define SEMUNDO_LOCK() mtx_lock(&SEMUNDO_MTX); 121#define SEMUNDO_UNLOCK() mtx_unlock(&SEMUNDO_MTX); 122#define SEMUNDO_LOCKASSERT(how) mtx_assert(&SEMUNDO_MTX, (how)); 123 124struct sem { 125 u_short semval; /* semaphore value */ 126 pid_t sempid; /* pid of last operation */ 127 u_short semncnt; /* # awaiting semval > cval */ 128 u_short semzcnt; /* # awaiting semval = 0 */ 129}; 130 131/* 132 * Undo structure (one per process) 133 */ 134struct sem_undo { 135 LIST_ENTRY(sem_undo) un_next; /* ptr to next active undo structure */ 136 struct proc *un_proc; /* owner of this structure */ 137 short un_cnt; /* # of active entries */ 138 struct undo { 139 short un_adjval; /* adjust on exit values */ 140 short un_num; /* semaphore # */ 141 int un_id; /* semid */ 142 unsigned short un_seq; 143 } un_ent[1]; /* undo entries */ 144}; 145 146/* 147 * Configuration parameters 148 */ 149#ifndef SEMMNI 150#define SEMMNI 50 /* # of semaphore identifiers */ 151#endif 152#ifndef SEMMNS 153#define SEMMNS 340 /* # of semaphores in system */ 154#endif 155#ifndef SEMUME 156#define SEMUME 50 /* max # of undo entries per process */ 157#endif 158#ifndef SEMMNU 159#define SEMMNU 150 /* # of undo structures in system */ 160#endif 161 162/* shouldn't need tuning */ 163#ifndef SEMMSL 164#define SEMMSL SEMMNS /* max # of semaphores per id */ 165#endif 166#ifndef SEMOPM 167#define SEMOPM 100 /* max # of operations per semop call */ 168#endif 169 170#define SEMVMX 32767 /* semaphore maximum value */ 171#define SEMAEM 16384 /* adjust on exit max value */ 172 173/* 174 * Due to the way semaphore memory is allocated, we have to ensure that 175 * SEMUSZ is properly aligned. 176 */ 177 178#define SEM_ALIGN(bytes) (((bytes) + (sizeof(long) - 1)) & ~(sizeof(long) - 1)) 179 180/* actual size of an undo structure */ 181#define SEMUSZ SEM_ALIGN(offsetof(struct sem_undo, un_ent[SEMUME])) 182 183/* 184 * Macro to find a particular sem_undo vector 185 */ 186#define SEMU(ix) \ 187 ((struct sem_undo *)(((intptr_t)semu)+ix * seminfo.semusz)) 188 189/* 190 * semaphore info struct 191 */ 192struct seminfo seminfo = { 193 SEMMNI, /* # of semaphore identifiers */ 194 SEMMNS, /* # of semaphores in system */ 195 SEMMNU, /* # of undo structures in system */ 196 SEMMSL, /* max # of semaphores per id */ 197 SEMOPM, /* max # of operations per semop call */ 198 SEMUME, /* max # of undo entries per process */ 199 SEMUSZ, /* size in bytes of undo structure */ 200 SEMVMX, /* semaphore maximum value */ 201 SEMAEM /* adjust on exit max value */ 202}; 203 204SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RDTUN, &seminfo.semmni, 0, 205 "Number of semaphore identifiers"); 206SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RDTUN, &seminfo.semmns, 0, 207 "Maximum number of semaphores in the system"); 208SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RDTUN, &seminfo.semmnu, 0, 209 "Maximum number of undo structures in the system"); 210SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RW, &seminfo.semmsl, 0, 211 "Max semaphores per id"); 212SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RDTUN, &seminfo.semopm, 0, 213 "Max operations per semop call"); 214SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RDTUN, &seminfo.semume, 0, 215 "Max undo entries per process"); 216SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RDTUN, &seminfo.semusz, 0, 217 "Size in bytes of undo structure"); 218SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RW, &seminfo.semvmx, 0, 219 "Semaphore maximum value"); 220SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0, 221 "Adjust on exit max value"); 222SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, 223 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 224 NULL, 0, sysctl_sema, "", "Semaphore id pool"); 225 226static struct syscall_helper_data sem_syscalls[] = { 227 SYSCALL_INIT_HELPER(__semctl), 228 SYSCALL_INIT_HELPER(semget), 229 SYSCALL_INIT_HELPER(semop), 230#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 231 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 232 SYSCALL_INIT_HELPER(semsys), 233 SYSCALL_INIT_HELPER_COMPAT(freebsd7___semctl), 234#endif 235 SYSCALL_INIT_LAST 236}; 237 238#ifdef COMPAT_FREEBSD32 239#include <compat/freebsd32/freebsd32.h> 240#include <compat/freebsd32/freebsd32_ipc.h> 241#include <compat/freebsd32/freebsd32_proto.h> 242#include <compat/freebsd32/freebsd32_signal.h> 243#include <compat/freebsd32/freebsd32_syscall.h> 244#include <compat/freebsd32/freebsd32_util.h> 245 246static struct syscall_helper_data sem32_syscalls[] = { 247 SYSCALL32_INIT_HELPER(freebsd32_semctl), 248 SYSCALL32_INIT_HELPER_COMPAT(semget), 249 SYSCALL32_INIT_HELPER_COMPAT(semop), 250 SYSCALL32_INIT_HELPER(freebsd32_semsys), 251#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 252 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 253 SYSCALL32_INIT_HELPER(freebsd7_freebsd32_semctl), 254#endif 255 SYSCALL_INIT_LAST 256}; 257#endif 258 259static int 260seminit(void) 261{ 262 struct prison *pr; 263 void *rsv; 264 int i, error; 265 osd_method_t methods[PR_MAXMETHOD] = { 266 [PR_METHOD_CHECK] = sem_prison_check, 267 [PR_METHOD_SET] = sem_prison_set, 268 [PR_METHOD_GET] = sem_prison_get, 269 [PR_METHOD_REMOVE] = sem_prison_remove, 270 }; 271 272 TUNABLE_INT_FETCH("kern.ipc.semmni", &seminfo.semmni); 273 TUNABLE_INT_FETCH("kern.ipc.semmns", &seminfo.semmns); 274 TUNABLE_INT_FETCH("kern.ipc.semmnu", &seminfo.semmnu); 275 TUNABLE_INT_FETCH("kern.ipc.semmsl", &seminfo.semmsl); 276 TUNABLE_INT_FETCH("kern.ipc.semopm", &seminfo.semopm); 277 TUNABLE_INT_FETCH("kern.ipc.semume", &seminfo.semume); 278 TUNABLE_INT_FETCH("kern.ipc.semusz", &seminfo.semusz); 279 TUNABLE_INT_FETCH("kern.ipc.semvmx", &seminfo.semvmx); 280 TUNABLE_INT_FETCH("kern.ipc.semaem", &seminfo.semaem); 281 282 sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, M_WAITOK); 283 sema = malloc(sizeof(struct semid_kernel) * seminfo.semmni, M_SEM, 284 M_WAITOK); 285 sema_mtx = malloc(sizeof(struct mtx) * seminfo.semmni, M_SEM, 286 M_WAITOK | M_ZERO); 287 semu = malloc(seminfo.semmnu * seminfo.semusz, M_SEM, M_WAITOK); 288 289 for (i = 0; i < seminfo.semmni; i++) { 290 sema[i].u.sem_base = 0; 291 sema[i].u.sem_perm.mode = 0; 292 sema[i].u.sem_perm.seq = 0; 293#ifdef MAC 294 mac_sysvsem_init(&sema[i]); 295#endif 296 } 297 for (i = 0; i < seminfo.semmni; i++) 298 mtx_init(&sema_mtx[i], "semid", NULL, MTX_DEF); 299 LIST_INIT(&semu_free_list); 300 for (i = 0; i < seminfo.semmnu; i++) { 301 struct sem_undo *suptr = SEMU(i); 302 suptr->un_proc = NULL; 303 LIST_INSERT_HEAD(&semu_free_list, suptr, un_next); 304 } 305 LIST_INIT(&semu_list); 306 mtx_init(&sem_mtx, "sem", NULL, MTX_DEF); 307 mtx_init(&sem_undo_mtx, "semu", NULL, MTX_DEF); 308 semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL, 309 EVENTHANDLER_PRI_ANY); 310 311 /* Set current prisons according to their allow.sysvipc. */ 312 sem_prison_slot = osd_jail_register(NULL, methods); 313 rsv = osd_reserve(sem_prison_slot); 314 prison_lock(&prison0); 315 (void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, &prison0); 316 prison_unlock(&prison0); 317 rsv = NULL; 318 sx_slock(&allprison_lock); 319 TAILQ_FOREACH(pr, &allprison, pr_list) { 320 if (rsv == NULL) 321 rsv = osd_reserve(sem_prison_slot); 322 prison_lock(pr); 323 if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { 324 (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv, 325 &prison0); 326 rsv = NULL; 327 } 328 prison_unlock(pr); 329 } 330 if (rsv != NULL) 331 osd_free_reserved(rsv); 332 sx_sunlock(&allprison_lock); 333 334 error = syscall_helper_register(sem_syscalls); 335 if (error != 0) 336 return (error); 337#ifdef COMPAT_FREEBSD32 338 error = syscall32_helper_register(sem32_syscalls); 339 if (error != 0) 340 return (error); 341#endif 342 return (0); 343} 344 345static int 346semunload(void) 347{ 348 int i; 349 350 /* XXXKIB */ 351 if (semtot != 0) 352 return (EBUSY); 353 354#ifdef COMPAT_FREEBSD32 355 syscall32_helper_unregister(sem32_syscalls); 356#endif 357 syscall_helper_unregister(sem_syscalls); 358 EVENTHANDLER_DEREGISTER(process_exit, semexit_tag); 359 if (sem_prison_slot != 0) 360 osd_jail_deregister(sem_prison_slot); 361#ifdef MAC 362 for (i = 0; i < seminfo.semmni; i++) 363 mac_sysvsem_destroy(&sema[i]); 364#endif 365 free(sem, M_SEM); 366 free(sema, M_SEM); 367 free(semu, M_SEM); 368 for (i = 0; i < seminfo.semmni; i++) 369 mtx_destroy(&sema_mtx[i]); 370 free(sema_mtx, M_SEM); 371 mtx_destroy(&sem_mtx); 372 mtx_destroy(&sem_undo_mtx); 373 return (0); 374} 375 376static int 377sysvsem_modload(struct module *module, int cmd, void *arg) 378{ 379 int error = 0; 380 381 switch (cmd) { 382 case MOD_LOAD: 383 error = seminit(); 384 if (error != 0) 385 semunload(); 386 break; 387 case MOD_UNLOAD: 388 error = semunload(); 389 break; 390 case MOD_SHUTDOWN: 391 break; 392 default: 393 error = EINVAL; 394 break; 395 } 396 return (error); 397} 398 399static moduledata_t sysvsem_mod = { 400 "sysvsem", 401 &sysvsem_modload, 402 NULL 403}; 404 405DECLARE_MODULE(sysvsem, sysvsem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST); 406MODULE_VERSION(sysvsem, 1); 407 408/* 409 * Allocate a new sem_undo structure for a process 410 * (returns ptr to structure or NULL if no more room) 411 */ 412 413static struct sem_undo * 414semu_alloc(struct thread *td) 415{ 416 struct sem_undo *suptr; 417 418 SEMUNDO_LOCKASSERT(MA_OWNED); 419 if ((suptr = LIST_FIRST(&semu_free_list)) == NULL) 420 return (NULL); 421 LIST_REMOVE(suptr, un_next); 422 LIST_INSERT_HEAD(&semu_list, suptr, un_next); 423 suptr->un_cnt = 0; 424 suptr->un_proc = td->td_proc; 425 return (suptr); 426} 427 428static int 429semu_try_free(struct sem_undo *suptr) 430{ 431 432 SEMUNDO_LOCKASSERT(MA_OWNED); 433 434 if (suptr->un_cnt != 0) 435 return (0); 436 LIST_REMOVE(suptr, un_next); 437 LIST_INSERT_HEAD(&semu_free_list, suptr, un_next); 438 return (1); 439} 440 441/* 442 * Adjust a particular entry for a particular proc 443 */ 444 445static int 446semundo_adjust(struct thread *td, struct sem_undo **supptr, int semid, 447 int semseq, int semnum, int adjval) 448{ 449 struct proc *p = td->td_proc; 450 struct sem_undo *suptr; 451 struct undo *sunptr; 452 int i; 453 454 SEMUNDO_LOCKASSERT(MA_OWNED); 455 /* Look for and remember the sem_undo if the caller doesn't provide 456 it */ 457 458 suptr = *supptr; 459 if (suptr == NULL) { 460 LIST_FOREACH(suptr, &semu_list, un_next) { 461 if (suptr->un_proc == p) { 462 *supptr = suptr; 463 break; 464 } 465 } 466 if (suptr == NULL) { 467 if (adjval == 0) 468 return(0); 469 suptr = semu_alloc(td); 470 if (suptr == NULL) 471 return (ENOSPC); 472 *supptr = suptr; 473 } 474 } 475 476 /* 477 * Look for the requested entry and adjust it (delete if adjval becomes 478 * 0). 479 */ 480 sunptr = &suptr->un_ent[0]; 481 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 482 if (sunptr->un_id != semid || sunptr->un_num != semnum) 483 continue; 484 if (adjval != 0) { 485 adjval += sunptr->un_adjval; 486 if (adjval > seminfo.semaem || adjval < -seminfo.semaem) 487 return (ERANGE); 488 } 489 sunptr->un_adjval = adjval; 490 if (sunptr->un_adjval == 0) { 491 suptr->un_cnt--; 492 if (i < suptr->un_cnt) 493 suptr->un_ent[i] = 494 suptr->un_ent[suptr->un_cnt]; 495 if (suptr->un_cnt == 0) 496 semu_try_free(suptr); 497 } 498 return (0); 499 } 500 501 /* Didn't find the right entry - create it */ 502 if (adjval == 0) 503 return (0); 504 if (adjval > seminfo.semaem || adjval < -seminfo.semaem) 505 return (ERANGE); 506 if (suptr->un_cnt != seminfo.semume) { 507 sunptr = &suptr->un_ent[suptr->un_cnt]; 508 suptr->un_cnt++; 509 sunptr->un_adjval = adjval; 510 sunptr->un_id = semid; 511 sunptr->un_num = semnum; 512 sunptr->un_seq = semseq; 513 } else 514 return (EINVAL); 515 return (0); 516} 517 518static void 519semundo_clear(int semid, int semnum) 520{ 521 struct sem_undo *suptr, *suptr1; 522 struct undo *sunptr; 523 int i; 524 525 SEMUNDO_LOCKASSERT(MA_OWNED); 526 LIST_FOREACH_SAFE(suptr, &semu_list, un_next, suptr1) { 527 sunptr = &suptr->un_ent[0]; 528 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 529 if (sunptr->un_id != semid) 530 continue; 531 if (semnum == -1 || sunptr->un_num == semnum) { 532 suptr->un_cnt--; 533 if (i < suptr->un_cnt) { 534 suptr->un_ent[i] = 535 suptr->un_ent[suptr->un_cnt]; 536 continue; 537 } 538 semu_try_free(suptr); 539 } 540 if (semnum != -1) 541 break; 542 } 543 } 544} 545 546static int 547semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr) 548{ 549 550 return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || 551 semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) || 552 sem_prison_cansee(rpr, semakptr) ? EINVAL : 0); 553} 554 555static void 556sem_remove(int semidx, struct ucred *cred) 557{ 558 struct semid_kernel *semakptr; 559 int i; 560 561 KASSERT(semidx >= 0 && semidx < seminfo.semmni, 562 ("semidx out of bounds")); 563 semakptr = &sema[semidx]; 564 semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0; 565 semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0; 566 semakptr->u.sem_perm.mode = 0; 567 racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems); 568 crfree(semakptr->cred); 569 semakptr->cred = NULL; 570 SEMUNDO_LOCK(); 571 semundo_clear(semidx, -1); 572 SEMUNDO_UNLOCK(); 573#ifdef MAC 574 mac_sysvsem_cleanup(semakptr); 575#endif 576 wakeup(semakptr); 577 for (i = 0; i < seminfo.semmni; i++) { 578 if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && 579 sema[i].u.sem_base > semakptr->u.sem_base) 580 mtx_lock_flags(&sema_mtx[i], LOP_DUPOK); 581 } 582 for (i = semakptr->u.sem_base - sem; i < semtot; i++) 583 sem[i] = sem[i + semakptr->u.sem_nsems]; 584 for (i = 0; i < seminfo.semmni; i++) { 585 if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && 586 sema[i].u.sem_base > semakptr->u.sem_base) { 587 sema[i].u.sem_base -= semakptr->u.sem_nsems; 588 mtx_unlock(&sema_mtx[i]); 589 } 590 } 591 semtot -= semakptr->u.sem_nsems; 592} 593 594static struct prison * 595sem_find_prison(struct ucred *cred) 596{ 597 struct prison *pr, *rpr; 598 599 pr = cred->cr_prison; 600 prison_lock(pr); 601 rpr = osd_jail_get(pr, sem_prison_slot); 602 prison_unlock(pr); 603 return rpr; 604} 605 606static int 607sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr) 608{ 609 610 if (semakptr->cred == NULL || 611 !(rpr == semakptr->cred->cr_prison || 612 prison_ischild(rpr, semakptr->cred->cr_prison))) 613 return (EINVAL); 614 return (0); 615} 616 617/* 618 * Note that the user-mode half of this passes a union, not a pointer. 619 */ 620#ifndef _SYS_SYSPROTO_H_ 621struct __semctl_args { 622 int semid; 623 int semnum; 624 int cmd; 625 union semun *arg; 626}; 627#endif 628int 629sys___semctl(struct thread *td, struct __semctl_args *uap) 630{ 631 struct semid_ds dsbuf; 632 union semun arg, semun; 633 register_t rval; 634 int error; 635 636 switch (uap->cmd) { 637 case SEM_STAT: 638 case IPC_SET: 639 case IPC_STAT: 640 case GETALL: 641 case SETVAL: 642 case SETALL: 643 error = copyin(uap->arg, &arg, sizeof(arg)); 644 if (error) 645 return (error); 646 break; 647 } 648 649 switch (uap->cmd) { 650 case SEM_STAT: 651 case IPC_STAT: 652 semun.buf = &dsbuf; 653 break; 654 case IPC_SET: 655 error = copyin(arg.buf, &dsbuf, sizeof(dsbuf)); 656 if (error) 657 return (error); 658 semun.buf = &dsbuf; 659 break; 660 case GETALL: 661 case SETALL: 662 semun.array = arg.array; 663 break; 664 case SETVAL: 665 semun.val = arg.val; 666 break; 667 } 668 669 error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, 670 &rval); 671 if (error) 672 return (error); 673 674 switch (uap->cmd) { 675 case SEM_STAT: 676 case IPC_STAT: 677 error = copyout(&dsbuf, arg.buf, sizeof(dsbuf)); 678 break; 679 } 680 681 if (error == 0) 682 td->td_retval[0] = rval; 683 return (error); 684} 685 686int 687kern_semctl(struct thread *td, int semid, int semnum, int cmd, 688 union semun *arg, register_t *rval) 689{ 690 u_short *array; 691 struct ucred *cred = td->td_ucred; 692 int i, error; 693 struct prison *rpr; 694 struct semid_ds *sbuf; 695 struct semid_kernel *semakptr; 696 struct mtx *sema_mtxp; 697 u_short usval, count; 698 int semidx; 699 700 DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n", 701 semid, semnum, cmd, arg)); 702 703 rpr = sem_find_prison(td->td_ucred); 704 if (sem == NULL) 705 return (ENOSYS); 706 707 array = NULL; 708 709 switch(cmd) { 710 case SEM_STAT: 711 /* 712 * For this command we assume semid is an array index 713 * rather than an IPC id. 714 */ 715 if (semid < 0 || semid >= seminfo.semmni) 716 return (EINVAL); 717 semakptr = &sema[semid]; 718 sema_mtxp = &sema_mtx[semid]; 719 mtx_lock(sema_mtxp); 720 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) { 721 error = EINVAL; 722 goto done2; 723 } 724 if ((error = sem_prison_cansee(rpr, semakptr))) 725 goto done2; 726 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 727 goto done2; 728#ifdef MAC 729 error = mac_sysvsem_check_semctl(cred, semakptr, cmd); 730 if (error != 0) 731 goto done2; 732#endif 733 bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds)); 734 if (cred->cr_prison != semakptr->cred->cr_prison) 735 arg->buf->sem_perm.key = IPC_PRIVATE; 736 *rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm); 737 mtx_unlock(sema_mtxp); 738 return (0); 739 } 740 741 semidx = IPCID_TO_IX(semid); 742 if (semidx < 0 || semidx >= seminfo.semmni) 743 return (EINVAL); 744 745 semakptr = &sema[semidx]; 746 sema_mtxp = &sema_mtx[semidx]; 747 if (cmd == IPC_RMID) 748 mtx_lock(&sem_mtx); 749 mtx_lock(sema_mtxp); 750 751#ifdef MAC 752 error = mac_sysvsem_check_semctl(cred, semakptr, cmd); 753 if (error != 0) 754 goto done2; 755#endif 756 757 error = 0; 758 *rval = 0; 759 760 switch (cmd) { 761 case IPC_RMID: 762 if ((error = semvalid(semid, rpr, semakptr)) != 0) 763 goto done2; 764 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M))) 765 goto done2; 766 sem_remove(semidx, cred); 767 break; 768 769 case IPC_SET: 770 if ((error = semvalid(semid, rpr, semakptr)) != 0) 771 goto done2; 772 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M))) 773 goto done2; 774 sbuf = arg->buf; 775 semakptr->u.sem_perm.uid = sbuf->sem_perm.uid; 776 semakptr->u.sem_perm.gid = sbuf->sem_perm.gid; 777 semakptr->u.sem_perm.mode = (semakptr->u.sem_perm.mode & 778 ~0777) | (sbuf->sem_perm.mode & 0777); 779 semakptr->u.sem_ctime = time_second; 780 break; 781 782 case IPC_STAT: 783 if ((error = semvalid(semid, rpr, semakptr)) != 0) 784 goto done2; 785 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 786 goto done2; 787 bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds)); 788 if (cred->cr_prison != semakptr->cred->cr_prison) 789 arg->buf->sem_perm.key = IPC_PRIVATE; 790 break; 791 792 case GETNCNT: 793 if ((error = semvalid(semid, rpr, semakptr)) != 0) 794 goto done2; 795 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 796 goto done2; 797 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 798 error = EINVAL; 799 goto done2; 800 } 801 *rval = semakptr->u.sem_base[semnum].semncnt; 802 break; 803 804 case GETPID: 805 if ((error = semvalid(semid, rpr, semakptr)) != 0) 806 goto done2; 807 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 808 goto done2; 809 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 810 error = EINVAL; 811 goto done2; 812 } 813 *rval = semakptr->u.sem_base[semnum].sempid; 814 break; 815 816 case GETVAL: 817 if ((error = semvalid(semid, rpr, semakptr)) != 0) 818 goto done2; 819 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 820 goto done2; 821 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 822 error = EINVAL; 823 goto done2; 824 } 825 *rval = semakptr->u.sem_base[semnum].semval; 826 break; 827 828 case GETALL: 829 /* 830 * Unfortunately, callers of this function don't know 831 * in advance how many semaphores are in this set. 832 * While we could just allocate the maximum size array 833 * and pass the actual size back to the caller, that 834 * won't work for SETALL since we can't copyin() more 835 * data than the user specified as we may return a 836 * spurious EFAULT. 837 * 838 * Note that the number of semaphores in a set is 839 * fixed for the life of that set. The only way that 840 * the 'count' could change while are blocked in 841 * malloc() is if this semaphore set were destroyed 842 * and a new one created with the same index. 843 * However, semvalid() will catch that due to the 844 * sequence number unless exactly 0x8000 (or a 845 * multiple thereof) semaphore sets for the same index 846 * are created and destroyed while we are in malloc! 847 * 848 */ 849 count = semakptr->u.sem_nsems; 850 mtx_unlock(sema_mtxp); 851 array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK); 852 mtx_lock(sema_mtxp); 853 if ((error = semvalid(semid, rpr, semakptr)) != 0) 854 goto done2; 855 KASSERT(count == semakptr->u.sem_nsems, ("nsems changed")); 856 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 857 goto done2; 858 for (i = 0; i < semakptr->u.sem_nsems; i++) 859 array[i] = semakptr->u.sem_base[i].semval; 860 mtx_unlock(sema_mtxp); 861 error = copyout(array, arg->array, count * sizeof(*array)); 862 mtx_lock(sema_mtxp); 863 break; 864 865 case GETZCNT: 866 if ((error = semvalid(semid, rpr, semakptr)) != 0) 867 goto done2; 868 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 869 goto done2; 870 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 871 error = EINVAL; 872 goto done2; 873 } 874 *rval = semakptr->u.sem_base[semnum].semzcnt; 875 break; 876 877 case SETVAL: 878 if ((error = semvalid(semid, rpr, semakptr)) != 0) 879 goto done2; 880 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W))) 881 goto done2; 882 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 883 error = EINVAL; 884 goto done2; 885 } 886 if (arg->val < 0 || arg->val > seminfo.semvmx) { 887 error = ERANGE; 888 goto done2; 889 } 890 semakptr->u.sem_base[semnum].semval = arg->val; 891 SEMUNDO_LOCK(); 892 semundo_clear(semidx, semnum); 893 SEMUNDO_UNLOCK(); 894 wakeup(semakptr); 895 break; 896 897 case SETALL: 898 /* 899 * See comment on GETALL for why 'count' shouldn't change 900 * and why we require a userland buffer. 901 */ 902 count = semakptr->u.sem_nsems; 903 mtx_unlock(sema_mtxp); 904 array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK); 905 error = copyin(arg->array, array, count * sizeof(*array)); 906 mtx_lock(sema_mtxp); 907 if (error) 908 break; 909 if ((error = semvalid(semid, rpr, semakptr)) != 0) 910 goto done2; 911 KASSERT(count == semakptr->u.sem_nsems, ("nsems changed")); 912 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W))) 913 goto done2; 914 for (i = 0; i < semakptr->u.sem_nsems; i++) { 915 usval = array[i]; 916 if (usval > seminfo.semvmx) { 917 error = ERANGE; 918 break; 919 } 920 semakptr->u.sem_base[i].semval = usval; 921 } 922 SEMUNDO_LOCK(); 923 semundo_clear(semidx, -1); 924 SEMUNDO_UNLOCK(); 925 wakeup(semakptr); 926 break; 927 928 default: 929 error = EINVAL; 930 break; 931 } 932 933done2: 934 mtx_unlock(sema_mtxp); 935 if (cmd == IPC_RMID) 936 mtx_unlock(&sem_mtx); 937 if (array != NULL) 938 free(array, M_TEMP); 939 return(error); 940} 941 942#ifndef _SYS_SYSPROTO_H_ 943struct semget_args { 944 key_t key; 945 int nsems; 946 int semflg; 947}; 948#endif 949int 950sys_semget(struct thread *td, struct semget_args *uap) 951{ 952 int semid, error = 0; 953 int key = uap->key; 954 int nsems = uap->nsems; 955 int semflg = uap->semflg; 956 struct ucred *cred = td->td_ucred; 957 958 DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg)); 959 960 if (sem_find_prison(cred) == NULL) 961 return (ENOSYS); 962 963 mtx_lock(&sem_mtx); 964 if (key != IPC_PRIVATE) { 965 for (semid = 0; semid < seminfo.semmni; semid++) { 966 if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) && 967 sema[semid].cred != NULL && 968 sema[semid].cred->cr_prison == cred->cr_prison && 969 sema[semid].u.sem_perm.key == key) 970 break; 971 } 972 if (semid < seminfo.semmni) { 973 DPRINTF(("found public key\n")); 974 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { 975 DPRINTF(("not exclusive\n")); 976 error = EEXIST; 977 goto done2; 978 } 979 if ((error = ipcperm(td, &sema[semid].u.sem_perm, 980 semflg & 0700))) { 981 goto done2; 982 } 983 if (nsems > 0 && sema[semid].u.sem_nsems < nsems) { 984 DPRINTF(("too small\n")); 985 error = EINVAL; 986 goto done2; 987 } 988#ifdef MAC 989 error = mac_sysvsem_check_semget(cred, &sema[semid]); 990 if (error != 0) 991 goto done2; 992#endif 993 goto found; 994 } 995 } 996 997 DPRINTF(("need to allocate the semid_kernel\n")); 998 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { 999 if (nsems <= 0 || nsems > seminfo.semmsl) { 1000 DPRINTF(("nsems out of range (0<%d<=%d)\n", nsems, 1001 seminfo.semmsl)); 1002 error = EINVAL; 1003 goto done2; 1004 } 1005 if (nsems > seminfo.semmns - semtot) { 1006 DPRINTF(( 1007 "not enough semaphores left (need %d, got %d)\n", 1008 nsems, seminfo.semmns - semtot)); 1009 error = ENOSPC; 1010 goto done2; 1011 } 1012 for (semid = 0; semid < seminfo.semmni; semid++) { 1013 if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0) 1014 break; 1015 } 1016 if (semid == seminfo.semmni) { 1017 DPRINTF(("no more semid_kernel's available\n")); 1018 error = ENOSPC; 1019 goto done2; 1020 } 1021#ifdef RACCT 1022 if (racct_enable) { 1023 PROC_LOCK(td->td_proc); 1024 error = racct_add(td->td_proc, RACCT_NSEM, nsems); 1025 PROC_UNLOCK(td->td_proc); 1026 if (error != 0) { 1027 error = ENOSPC; 1028 goto done2; 1029 } 1030 } 1031#endif 1032 DPRINTF(("semid %d is available\n", semid)); 1033 mtx_lock(&sema_mtx[semid]); 1034 KASSERT((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0, 1035 ("Lost semaphore %d", semid)); 1036 sema[semid].u.sem_perm.key = key; 1037 sema[semid].u.sem_perm.cuid = cred->cr_uid; 1038 sema[semid].u.sem_perm.uid = cred->cr_uid; 1039 sema[semid].u.sem_perm.cgid = cred->cr_gid; 1040 sema[semid].u.sem_perm.gid = cred->cr_gid; 1041 sema[semid].u.sem_perm.mode = (semflg & 0777) | SEM_ALLOC; 1042 sema[semid].cred = crhold(cred); 1043 sema[semid].u.sem_perm.seq = 1044 (sema[semid].u.sem_perm.seq + 1) & 0x7fff; 1045 sema[semid].u.sem_nsems = nsems; 1046 sema[semid].u.sem_otime = 0; 1047 sema[semid].u.sem_ctime = time_second; 1048 sema[semid].u.sem_base = &sem[semtot]; 1049 semtot += nsems; 1050 bzero(sema[semid].u.sem_base, 1051 sizeof(sema[semid].u.sem_base[0])*nsems); 1052#ifdef MAC 1053 mac_sysvsem_create(cred, &sema[semid]); 1054#endif 1055 mtx_unlock(&sema_mtx[semid]); 1056 DPRINTF(("sembase = %p, next = %p\n", 1057 sema[semid].u.sem_base, &sem[semtot])); 1058 } else { 1059 DPRINTF(("didn't find it and wasn't asked to create it\n")); 1060 error = ENOENT; 1061 goto done2; 1062 } 1063 1064found: 1065 td->td_retval[0] = IXSEQ_TO_IPCID(semid, sema[semid].u.sem_perm); 1066done2: 1067 mtx_unlock(&sem_mtx); 1068 return (error); 1069} 1070 1071#ifndef _SYS_SYSPROTO_H_ 1072struct semop_args { 1073 int semid; 1074 struct sembuf *sops; 1075 size_t nsops; 1076}; 1077#endif 1078int 1079sys_semop(struct thread *td, struct semop_args *uap) 1080{ 1081#define SMALL_SOPS 8 1082 struct sembuf small_sops[SMALL_SOPS]; 1083 int semid = uap->semid; 1084 size_t nsops = uap->nsops; 1085 struct prison *rpr; 1086 struct sembuf *sops; 1087 struct semid_kernel *semakptr; 1088 struct sembuf *sopptr = 0; 1089 struct sem *semptr = 0; 1090 struct sem_undo *suptr; 1091 struct mtx *sema_mtxp; 1092 size_t i, j, k; 1093 int error; 1094 int do_wakeup, do_undos; 1095 unsigned short seq; 1096 1097#ifdef SEM_DEBUG 1098 sops = NULL; 1099#endif 1100 DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops)); 1101 1102 rpr = sem_find_prison(td->td_ucred); 1103 if (sem == NULL) 1104 return (ENOSYS); 1105 1106 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ 1107 1108 if (semid < 0 || semid >= seminfo.semmni) 1109 return (EINVAL); 1110 1111 /* Allocate memory for sem_ops */ 1112 if (nsops <= SMALL_SOPS) 1113 sops = small_sops; 1114 else if (nsops > seminfo.semopm) { 1115 DPRINTF(("too many sops (max=%d, nsops=%d)\n", seminfo.semopm, 1116 nsops)); 1117 return (E2BIG); 1118 } else { 1119#ifdef RACCT 1120 if (racct_enable) { 1121 PROC_LOCK(td->td_proc); 1122 if (nsops > 1123 racct_get_available(td->td_proc, RACCT_NSEMOP)) { 1124 PROC_UNLOCK(td->td_proc); 1125 return (E2BIG); 1126 } 1127 PROC_UNLOCK(td->td_proc); 1128 } 1129#endif 1130 1131 sops = malloc(nsops * sizeof(*sops), M_TEMP, M_WAITOK); 1132 } 1133 if ((error = copyin(uap->sops, sops, nsops * sizeof(sops[0]))) != 0) { 1134 DPRINTF(("error = %d from copyin(%p, %p, %d)\n", error, 1135 uap->sops, sops, nsops * sizeof(sops[0]))); 1136 if (sops != small_sops) 1137 free(sops, M_SEM); 1138 return (error); 1139 } 1140 1141 semakptr = &sema[semid]; 1142 sema_mtxp = &sema_mtx[semid]; 1143 mtx_lock(sema_mtxp); 1144 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) { 1145 error = EINVAL; 1146 goto done2; 1147 } 1148 seq = semakptr->u.sem_perm.seq; 1149 if (seq != IPCID_TO_SEQ(uap->semid)) { 1150 error = EINVAL; 1151 goto done2; 1152 } 1153 if ((error = sem_prison_cansee(rpr, semakptr)) != 0) 1154 goto done2; 1155 /* 1156 * Initial pass thru sops to see what permissions are needed. 1157 * Also perform any checks that don't need repeating on each 1158 * attempt to satisfy the request vector. 1159 */ 1160 j = 0; /* permission needed */ 1161 do_undos = 0; 1162 for (i = 0; i < nsops; i++) { 1163 sopptr = &sops[i]; 1164 if (sopptr->sem_num >= semakptr->u.sem_nsems) { 1165 error = EFBIG; 1166 goto done2; 1167 } 1168 if (sopptr->sem_flg & SEM_UNDO && sopptr->sem_op != 0) 1169 do_undos = 1; 1170 j |= (sopptr->sem_op == 0) ? SEM_R : SEM_A; 1171 } 1172 1173 if ((error = ipcperm(td, &semakptr->u.sem_perm, j))) { 1174 DPRINTF(("error = %d from ipaccess\n", error)); 1175 goto done2; 1176 } 1177#ifdef MAC 1178 error = mac_sysvsem_check_semop(td->td_ucred, semakptr, j); 1179 if (error != 0) 1180 goto done2; 1181#endif 1182 1183 /* 1184 * Loop trying to satisfy the vector of requests. 1185 * If we reach a point where we must wait, any requests already 1186 * performed are rolled back and we go to sleep until some other 1187 * process wakes us up. At this point, we start all over again. 1188 * 1189 * This ensures that from the perspective of other tasks, a set 1190 * of requests is atomic (never partially satisfied). 1191 */ 1192 for (;;) { 1193 do_wakeup = 0; 1194 error = 0; /* error return if necessary */ 1195 1196 for (i = 0; i < nsops; i++) { 1197 sopptr = &sops[i]; 1198 semptr = &semakptr->u.sem_base[sopptr->sem_num]; 1199 1200 DPRINTF(( 1201 "semop: semakptr=%p, sem_base=%p, " 1202 "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n", 1203 semakptr, semakptr->u.sem_base, semptr, 1204 sopptr->sem_num, semptr->semval, sopptr->sem_op, 1205 (sopptr->sem_flg & IPC_NOWAIT) ? 1206 "nowait" : "wait")); 1207 1208 if (sopptr->sem_op < 0) { 1209 if (semptr->semval + sopptr->sem_op < 0) { 1210 DPRINTF(("semop: can't do it now\n")); 1211 break; 1212 } else { 1213 semptr->semval += sopptr->sem_op; 1214 if (semptr->semval == 0 && 1215 semptr->semzcnt > 0) 1216 do_wakeup = 1; 1217 } 1218 } else if (sopptr->sem_op == 0) { 1219 if (semptr->semval != 0) { 1220 DPRINTF(("semop: not zero now\n")); 1221 break; 1222 } 1223 } else if (semptr->semval + sopptr->sem_op > 1224 seminfo.semvmx) { 1225 error = ERANGE; 1226 break; 1227 } else { 1228 if (semptr->semncnt > 0) 1229 do_wakeup = 1; 1230 semptr->semval += sopptr->sem_op; 1231 } 1232 } 1233 1234 /* 1235 * Did we get through the entire vector? 1236 */ 1237 if (i >= nsops) 1238 goto done; 1239 1240 /* 1241 * No ... rollback anything that we've already done 1242 */ 1243 DPRINTF(("semop: rollback 0 through %d\n", i-1)); 1244 for (j = 0; j < i; j++) 1245 semakptr->u.sem_base[sops[j].sem_num].semval -= 1246 sops[j].sem_op; 1247 1248 /* If we detected an error, return it */ 1249 if (error != 0) 1250 goto done2; 1251 1252 /* 1253 * If the request that we couldn't satisfy has the 1254 * NOWAIT flag set then return with EAGAIN. 1255 */ 1256 if (sopptr->sem_flg & IPC_NOWAIT) { 1257 error = EAGAIN; 1258 goto done2; 1259 } 1260 1261 if (sopptr->sem_op == 0) 1262 semptr->semzcnt++; 1263 else 1264 semptr->semncnt++; 1265 1266 DPRINTF(("semop: good night!\n")); 1267 error = msleep(semakptr, sema_mtxp, (PZERO - 4) | PCATCH, 1268 "semwait", 0); 1269 DPRINTF(("semop: good morning (error=%d)!\n", error)); 1270 /* return code is checked below, after sem[nz]cnt-- */ 1271 1272 /* 1273 * Make sure that the semaphore still exists 1274 */ 1275 seq = semakptr->u.sem_perm.seq; 1276 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || 1277 seq != IPCID_TO_SEQ(uap->semid)) { 1278 error = EIDRM; 1279 goto done2; 1280 } 1281 1282 /* 1283 * Renew the semaphore's pointer after wakeup since 1284 * during msleep sem_base may have been modified and semptr 1285 * is not valid any more 1286 */ 1287 semptr = &semakptr->u.sem_base[sopptr->sem_num]; 1288 1289 /* 1290 * The semaphore is still alive. Readjust the count of 1291 * waiting processes. 1292 */ 1293 if (sopptr->sem_op == 0) 1294 semptr->semzcnt--; 1295 else 1296 semptr->semncnt--; 1297 1298 /* 1299 * Is it really morning, or was our sleep interrupted? 1300 * (Delayed check of msleep() return code because we 1301 * need to decrement sem[nz]cnt either way.) 1302 */ 1303 if (error != 0) { 1304 error = EINTR; 1305 goto done2; 1306 } 1307 DPRINTF(("semop: good morning!\n")); 1308 } 1309 1310done: 1311 /* 1312 * Process any SEM_UNDO requests. 1313 */ 1314 if (do_undos) { 1315 SEMUNDO_LOCK(); 1316 suptr = NULL; 1317 for (i = 0; i < nsops; i++) { 1318 /* 1319 * We only need to deal with SEM_UNDO's for non-zero 1320 * op's. 1321 */ 1322 int adjval; 1323 1324 if ((sops[i].sem_flg & SEM_UNDO) == 0) 1325 continue; 1326 adjval = sops[i].sem_op; 1327 if (adjval == 0) 1328 continue; 1329 error = semundo_adjust(td, &suptr, semid, seq, 1330 sops[i].sem_num, -adjval); 1331 if (error == 0) 1332 continue; 1333 1334 /* 1335 * Oh-Oh! We ran out of either sem_undo's or undo's. 1336 * Rollback the adjustments to this point and then 1337 * rollback the semaphore ups and down so we can return 1338 * with an error with all structures restored. We 1339 * rollback the undo's in the exact reverse order that 1340 * we applied them. This guarantees that we won't run 1341 * out of space as we roll things back out. 1342 */ 1343 for (j = 0; j < i; j++) { 1344 k = i - j - 1; 1345 if ((sops[k].sem_flg & SEM_UNDO) == 0) 1346 continue; 1347 adjval = sops[k].sem_op; 1348 if (adjval == 0) 1349 continue; 1350 if (semundo_adjust(td, &suptr, semid, seq, 1351 sops[k].sem_num, adjval) != 0) 1352 panic("semop - can't undo undos"); 1353 } 1354 1355 for (j = 0; j < nsops; j++) 1356 semakptr->u.sem_base[sops[j].sem_num].semval -= 1357 sops[j].sem_op; 1358 1359 DPRINTF(("error = %d from semundo_adjust\n", error)); 1360 SEMUNDO_UNLOCK(); 1361 goto done2; 1362 } /* loop through the sops */ 1363 SEMUNDO_UNLOCK(); 1364 } /* if (do_undos) */ 1365 1366 /* We're definitely done - set the sempid's and time */ 1367 for (i = 0; i < nsops; i++) { 1368 sopptr = &sops[i]; 1369 semptr = &semakptr->u.sem_base[sopptr->sem_num]; 1370 semptr->sempid = td->td_proc->p_pid; 1371 } 1372 semakptr->u.sem_otime = time_second; 1373 1374 /* 1375 * Do a wakeup if any semaphore was up'd whilst something was 1376 * sleeping on it. 1377 */ 1378 if (do_wakeup) { 1379 DPRINTF(("semop: doing wakeup\n")); 1380 wakeup(semakptr); 1381 DPRINTF(("semop: back from wakeup\n")); 1382 } 1383 DPRINTF(("semop: done\n")); 1384 td->td_retval[0] = 0; 1385done2: 1386 mtx_unlock(sema_mtxp); 1387 if (sops != small_sops) 1388 free(sops, M_SEM); 1389 return (error); 1390} 1391 1392/* 1393 * Go through the undo structures for this process and apply the adjustments to 1394 * semaphores. 1395 */ 1396static void 1397semexit_myhook(void *arg, struct proc *p) 1398{ 1399 struct sem_undo *suptr; 1400 struct semid_kernel *semakptr; 1401 struct mtx *sema_mtxp; 1402 int semid, semnum, adjval, ix; 1403 unsigned short seq; 1404 1405 /* 1406 * Go through the chain of undo vectors looking for one 1407 * associated with this process. 1408 */ 1409 SEMUNDO_LOCK(); 1410 LIST_FOREACH(suptr, &semu_list, un_next) { 1411 if (suptr->un_proc == p) 1412 break; 1413 } 1414 if (suptr == NULL) { 1415 SEMUNDO_UNLOCK(); 1416 return; 1417 } 1418 LIST_REMOVE(suptr, un_next); 1419 1420 DPRINTF(("proc @%p has undo structure with %d entries\n", p, 1421 suptr->un_cnt)); 1422 1423 /* 1424 * If there are any active undo elements then process them. 1425 */ 1426 if (suptr->un_cnt > 0) { 1427 SEMUNDO_UNLOCK(); 1428 for (ix = 0; ix < suptr->un_cnt; ix++) { 1429 semid = suptr->un_ent[ix].un_id; 1430 semnum = suptr->un_ent[ix].un_num; 1431 adjval = suptr->un_ent[ix].un_adjval; 1432 seq = suptr->un_ent[ix].un_seq; 1433 semakptr = &sema[semid]; 1434 sema_mtxp = &sema_mtx[semid]; 1435 1436 mtx_lock(sema_mtxp); 1437 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || 1438 (semakptr->u.sem_perm.seq != seq)) { 1439 mtx_unlock(sema_mtxp); 1440 continue; 1441 } 1442 if (semnum >= semakptr->u.sem_nsems) 1443 panic("semexit - semnum out of range"); 1444 1445 DPRINTF(( 1446 "semexit: %p id=%d num=%d(adj=%d) ; sem=%d\n", 1447 suptr->un_proc, suptr->un_ent[ix].un_id, 1448 suptr->un_ent[ix].un_num, 1449 suptr->un_ent[ix].un_adjval, 1450 semakptr->u.sem_base[semnum].semval)); 1451 1452 if (adjval < 0 && semakptr->u.sem_base[semnum].semval < 1453 -adjval) 1454 semakptr->u.sem_base[semnum].semval = 0; 1455 else 1456 semakptr->u.sem_base[semnum].semval += adjval; 1457 1458 wakeup(semakptr); 1459 DPRINTF(("semexit: back from wakeup\n")); 1460 mtx_unlock(sema_mtxp); 1461 } 1462 SEMUNDO_LOCK(); 1463 } 1464 1465 /* 1466 * Deallocate the undo vector. 1467 */ 1468 DPRINTF(("removing vector\n")); 1469 suptr->un_proc = NULL; 1470 suptr->un_cnt = 0; 1471 LIST_INSERT_HEAD(&semu_free_list, suptr, un_next); 1472 SEMUNDO_UNLOCK(); 1473} 1474 1475static int 1476sysctl_sema(SYSCTL_HANDLER_ARGS) 1477{ 1478 struct prison *pr, *rpr; 1479 struct semid_kernel tsemak; 1480 int error, i; 1481 1482 pr = req->td->td_ucred->cr_prison; 1483 rpr = sem_find_prison(req->td->td_ucred); 1484 error = 0; 1485 for (i = 0; i < seminfo.semmni; i++) { 1486 mtx_lock(&sema_mtx[i]); 1487 if ((sema[i].u.sem_perm.mode & SEM_ALLOC) == 0 || 1488 rpr == NULL || sem_prison_cansee(rpr, &sema[i]) != 0) 1489 bzero(&tsemak, sizeof(tsemak)); 1490 else { 1491 tsemak = sema[i]; 1492 if (tsemak.cred->cr_prison != pr) 1493 tsemak.u.sem_perm.key = IPC_PRIVATE; 1494 } 1495 mtx_unlock(&sema_mtx[i]); 1496 error = SYSCTL_OUT(req, &tsemak, sizeof(tsemak)); 1497 if (error != 0) 1498 break; 1499 } 1500 return (error); 1501} 1502 1503static int 1504sem_prison_check(void *obj, void *data) 1505{ 1506 struct prison *pr = obj; 1507 struct prison *prpr; 1508 struct vfsoptlist *opts = data; 1509 int error, jsys; 1510 1511 /* 1512 * sysvsem is a jailsys integer. 1513 * It must be "disable" if the parent jail is disabled. 1514 */ 1515 error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)); 1516 if (error != ENOENT) { 1517 if (error != 0) 1518 return (error); 1519 switch (jsys) { 1520 case JAIL_SYS_DISABLE: 1521 break; 1522 case JAIL_SYS_NEW: 1523 case JAIL_SYS_INHERIT: 1524 prison_lock(pr->pr_parent); 1525 prpr = osd_jail_get(pr->pr_parent, sem_prison_slot); 1526 prison_unlock(pr->pr_parent); 1527 if (prpr == NULL) 1528 return (EPERM); 1529 break; 1530 default: 1531 return (EINVAL); 1532 } 1533 } 1534 1535 return (0); 1536} 1537 1538static int 1539sem_prison_set(void *obj, void *data) 1540{ 1541 struct prison *pr = obj; 1542 struct prison *tpr, *orpr, *nrpr, *trpr; 1543 struct vfsoptlist *opts = data; 1544 void *rsv; 1545 int jsys, descend; 1546 1547 /* 1548 * sysvsem controls which jail is the root of the associated sems (this 1549 * jail or same as the parent), or if the feature is available at all. 1550 */ 1551 if (vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)) == ENOENT) 1552 jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) 1553 ? JAIL_SYS_INHERIT 1554 : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) 1555 ? JAIL_SYS_DISABLE 1556 : -1; 1557 if (jsys == JAIL_SYS_DISABLE) { 1558 prison_lock(pr); 1559 orpr = osd_jail_get(pr, sem_prison_slot); 1560 if (orpr != NULL) 1561 osd_jail_del(pr, sem_prison_slot); 1562 prison_unlock(pr); 1563 if (orpr != NULL) { 1564 if (orpr == pr) 1565 sem_prison_cleanup(pr); 1566 /* Disable all child jails as well. */ 1567 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1568 prison_lock(tpr); 1569 trpr = osd_jail_get(tpr, sem_prison_slot); 1570 if (trpr != NULL) { 1571 osd_jail_del(tpr, sem_prison_slot); 1572 prison_unlock(tpr); 1573 if (trpr == tpr) 1574 sem_prison_cleanup(tpr); 1575 } else { 1576 prison_unlock(tpr); 1577 descend = 0; 1578 } 1579 } 1580 } 1581 } else if (jsys != -1) { 1582 if (jsys == JAIL_SYS_NEW) 1583 nrpr = pr; 1584 else { 1585 prison_lock(pr->pr_parent); 1586 nrpr = osd_jail_get(pr->pr_parent, sem_prison_slot); 1587 prison_unlock(pr->pr_parent); 1588 } 1589 rsv = osd_reserve(sem_prison_slot); 1590 prison_lock(pr); 1591 orpr = osd_jail_get(pr, sem_prison_slot); 1592 if (orpr != nrpr) 1593 (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv, 1594 nrpr); 1595 else 1596 osd_free_reserved(rsv); 1597 prison_unlock(pr); 1598 if (orpr != nrpr) { 1599 if (orpr == pr) 1600 sem_prison_cleanup(pr); 1601 if (orpr != NULL) { 1602 /* Change child jails matching the old root, */ 1603 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1604 prison_lock(tpr); 1605 trpr = osd_jail_get(tpr, 1606 sem_prison_slot); 1607 if (trpr == orpr) { 1608 (void)osd_jail_set(tpr, 1609 sem_prison_slot, nrpr); 1610 prison_unlock(tpr); 1611 if (trpr == tpr) 1612 sem_prison_cleanup(tpr); 1613 } else { 1614 prison_unlock(tpr); 1615 descend = 0; 1616 } 1617 } 1618 } 1619 } 1620 } 1621 1622 return (0); 1623} 1624 1625static int 1626sem_prison_get(void *obj, void *data) 1627{ 1628 struct prison *pr = obj; 1629 struct prison *rpr; 1630 struct vfsoptlist *opts = data; 1631 int error, jsys; 1632 1633 /* Set sysvsem based on the jail's root prison. */ 1634 prison_lock(pr); 1635 rpr = osd_jail_get(pr, sem_prison_slot); 1636 prison_unlock(pr); 1637 jsys = rpr == NULL ? JAIL_SYS_DISABLE 1638 : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 1639 error = vfs_setopt(opts, "sysvsem", &jsys, sizeof(jsys)); 1640 if (error == ENOENT) 1641 error = 0; 1642 return (error); 1643} 1644 1645static int 1646sem_prison_remove(void *obj, void *data __unused) 1647{ 1648 struct prison *pr = obj; 1649 struct prison *rpr; 1650 1651 prison_lock(pr); 1652 rpr = osd_jail_get(pr, sem_prison_slot); 1653 prison_unlock(pr); 1654 if (rpr == pr) 1655 sem_prison_cleanup(pr); 1656 return (0); 1657} 1658 1659static void 1660sem_prison_cleanup(struct prison *pr) 1661{ 1662 int i; 1663 1664 /* Remove any sems that belong to this jail. */ 1665 mtx_lock(&sem_mtx); 1666 for (i = 0; i < seminfo.semmni; i++) { 1667 if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && 1668 sema[i].cred != NULL && sema[i].cred->cr_prison == pr) { 1669 mtx_lock(&sema_mtx[i]); 1670 sem_remove(i, NULL); 1671 mtx_unlock(&sema_mtx[i]); 1672 } 1673 } 1674 mtx_unlock(&sem_mtx); 1675} 1676 1677SYSCTL_JAIL_PARAM_SYS_NODE(sysvsem, CTLFLAG_RW, "SYSV semaphores"); 1678 1679#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1680 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1681 1682/* XXX casting to (sy_call_t *) is bogus, as usual. */ 1683static sy_call_t *semcalls[] = { 1684 (sy_call_t *)freebsd7___semctl, (sy_call_t *)sys_semget, 1685 (sy_call_t *)sys_semop 1686}; 1687 1688/* 1689 * Entry point for all SEM calls. 1690 */ 1691int 1692sys_semsys(td, uap) 1693 struct thread *td; 1694 /* XXX actually varargs. */ 1695 struct semsys_args /* { 1696 int which; 1697 int a2; 1698 int a3; 1699 int a4; 1700 int a5; 1701 } */ *uap; 1702{ 1703 int error; 1704 1705 if (uap->which < 0 || 1706 uap->which >= sizeof(semcalls)/sizeof(semcalls[0])) 1707 return (EINVAL); 1708 error = (*semcalls[uap->which])(td, &uap->a2); 1709 return (error); 1710} 1711 1712#ifndef CP 1713#define CP(src, dst, fld) do { (dst).fld = (src).fld; } while (0) 1714#endif 1715 1716#ifndef _SYS_SYSPROTO_H_ 1717struct freebsd7___semctl_args { 1718 int semid; 1719 int semnum; 1720 int cmd; 1721 union semun_old *arg; 1722}; 1723#endif 1724int 1725freebsd7___semctl(struct thread *td, struct freebsd7___semctl_args *uap) 1726{ 1727 struct semid_ds_old dsold; 1728 struct semid_ds dsbuf; 1729 union semun_old arg; 1730 union semun semun; 1731 register_t rval; 1732 int error; 1733 1734 switch (uap->cmd) { 1735 case SEM_STAT: 1736 case IPC_SET: 1737 case IPC_STAT: 1738 case GETALL: 1739 case SETVAL: 1740 case SETALL: 1741 error = copyin(uap->arg, &arg, sizeof(arg)); 1742 if (error) 1743 return (error); 1744 break; 1745 } 1746 1747 switch (uap->cmd) { 1748 case SEM_STAT: 1749 case IPC_STAT: 1750 semun.buf = &dsbuf; 1751 break; 1752 case IPC_SET: 1753 error = copyin(arg.buf, &dsold, sizeof(dsold)); 1754 if (error) 1755 return (error); 1756 ipcperm_old2new(&dsold.sem_perm, &dsbuf.sem_perm); 1757 CP(dsold, dsbuf, sem_base); 1758 CP(dsold, dsbuf, sem_nsems); 1759 CP(dsold, dsbuf, sem_otime); 1760 CP(dsold, dsbuf, sem_ctime); 1761 semun.buf = &dsbuf; 1762 break; 1763 case GETALL: 1764 case SETALL: 1765 semun.array = arg.array; 1766 break; 1767 case SETVAL: 1768 semun.val = arg.val; 1769 break; 1770 } 1771 1772 error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, 1773 &rval); 1774 if (error) 1775 return (error); 1776 1777 switch (uap->cmd) { 1778 case SEM_STAT: 1779 case IPC_STAT: 1780 bzero(&dsold, sizeof(dsold)); 1781 ipcperm_new2old(&dsbuf.sem_perm, &dsold.sem_perm); 1782 CP(dsbuf, dsold, sem_base); 1783 CP(dsbuf, dsold, sem_nsems); 1784 CP(dsbuf, dsold, sem_otime); 1785 CP(dsbuf, dsold, sem_ctime); 1786 error = copyout(&dsold, arg.buf, sizeof(dsold)); 1787 break; 1788 } 1789 1790 if (error == 0) 1791 td->td_retval[0] = rval; 1792 return (error); 1793} 1794 1795#endif /* COMPAT_FREEBSD{4,5,6,7} */ 1796 1797#ifdef COMPAT_FREEBSD32 1798 1799int 1800freebsd32_semsys(struct thread *td, struct freebsd32_semsys_args *uap) 1801{ 1802 1803#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1804 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1805 switch (uap->which) { 1806 case 0: 1807 return (freebsd7_freebsd32_semctl(td, 1808 (struct freebsd7_freebsd32_semctl_args *)&uap->a2)); 1809 default: 1810 return (sys_semsys(td, (struct semsys_args *)uap)); 1811 } 1812#else 1813 return (nosys(td, NULL)); 1814#endif 1815} 1816 1817#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1818 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1819int 1820freebsd7_freebsd32_semctl(struct thread *td, 1821 struct freebsd7_freebsd32_semctl_args *uap) 1822{ 1823 struct semid_ds32_old dsbuf32; 1824 struct semid_ds dsbuf; 1825 union semun semun; 1826 union semun32 arg; 1827 register_t rval; 1828 int error; 1829 1830 switch (uap->cmd) { 1831 case SEM_STAT: 1832 case IPC_SET: 1833 case IPC_STAT: 1834 case GETALL: 1835 case SETVAL: 1836 case SETALL: 1837 error = copyin(uap->arg, &arg, sizeof(arg)); 1838 if (error) 1839 return (error); 1840 break; 1841 } 1842 1843 switch (uap->cmd) { 1844 case SEM_STAT: 1845 case IPC_STAT: 1846 semun.buf = &dsbuf; 1847 break; 1848 case IPC_SET: 1849 error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32)); 1850 if (error) 1851 return (error); 1852 freebsd32_ipcperm_old_in(&dsbuf32.sem_perm, &dsbuf.sem_perm); 1853 PTRIN_CP(dsbuf32, dsbuf, sem_base); 1854 CP(dsbuf32, dsbuf, sem_nsems); 1855 CP(dsbuf32, dsbuf, sem_otime); 1856 CP(dsbuf32, dsbuf, sem_ctime); 1857 semun.buf = &dsbuf; 1858 break; 1859 case GETALL: 1860 case SETALL: 1861 semun.array = PTRIN(arg.array); 1862 break; 1863 case SETVAL: 1864 semun.val = arg.val; 1865 break; 1866 } 1867 1868 error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, 1869 &rval); 1870 if (error) 1871 return (error); 1872 1873 switch (uap->cmd) { 1874 case SEM_STAT: 1875 case IPC_STAT: 1876 bzero(&dsbuf32, sizeof(dsbuf32)); 1877 freebsd32_ipcperm_old_out(&dsbuf.sem_perm, &dsbuf32.sem_perm); 1878 PTROUT_CP(dsbuf, dsbuf32, sem_base); 1879 CP(dsbuf, dsbuf32, sem_nsems); 1880 CP(dsbuf, dsbuf32, sem_otime); 1881 CP(dsbuf, dsbuf32, sem_ctime); 1882 error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32)); 1883 break; 1884 } 1885 1886 if (error == 0) 1887 td->td_retval[0] = rval; 1888 return (error); 1889} 1890#endif 1891 1892int 1893freebsd32_semctl(struct thread *td, struct freebsd32_semctl_args *uap) 1894{ 1895 struct semid_ds32 dsbuf32; 1896 struct semid_ds dsbuf; 1897 union semun semun; 1898 union semun32 arg; 1899 register_t rval; 1900 int error; 1901 1902 switch (uap->cmd) { 1903 case SEM_STAT: 1904 case IPC_SET: 1905 case IPC_STAT: 1906 case GETALL: 1907 case SETVAL: 1908 case SETALL: 1909 error = copyin(uap->arg, &arg, sizeof(arg)); 1910 if (error) 1911 return (error); 1912 break; 1913 } 1914 1915 switch (uap->cmd) { 1916 case SEM_STAT: 1917 case IPC_STAT: 1918 semun.buf = &dsbuf; 1919 break; 1920 case IPC_SET: 1921 error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32)); 1922 if (error) 1923 return (error); 1924 freebsd32_ipcperm_in(&dsbuf32.sem_perm, &dsbuf.sem_perm); 1925 PTRIN_CP(dsbuf32, dsbuf, sem_base); 1926 CP(dsbuf32, dsbuf, sem_nsems); 1927 CP(dsbuf32, dsbuf, sem_otime); 1928 CP(dsbuf32, dsbuf, sem_ctime); 1929 semun.buf = &dsbuf; 1930 break; 1931 case GETALL: 1932 case SETALL: 1933 semun.array = PTRIN(arg.array); 1934 break; 1935 case SETVAL: 1936 semun.val = arg.val; 1937 break; 1938 } 1939 1940 error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, 1941 &rval); 1942 if (error) 1943 return (error); 1944 1945 switch (uap->cmd) { 1946 case SEM_STAT: 1947 case IPC_STAT: 1948 bzero(&dsbuf32, sizeof(dsbuf32)); 1949 freebsd32_ipcperm_out(&dsbuf.sem_perm, &dsbuf32.sem_perm); 1950 PTROUT_CP(dsbuf, dsbuf32, sem_base); 1951 CP(dsbuf, dsbuf32, sem_nsems); 1952 CP(dsbuf, dsbuf32, sem_otime); 1953 CP(dsbuf, dsbuf32, sem_ctime); 1954 error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32)); 1955 break; 1956 } 1957 1958 if (error == 0) 1959 td->td_retval[0] = rval; 1960 return (error); 1961} 1962 1963#endif /* COMPAT_FREEBSD32 */ 1964