1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1982, 1986, 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 67 */ 68/* 69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce 70 * support for mandatory and extensible security protections. This notice 71 * is included in support of clause 2.2 (b) of the Apple Public License, 72 * Version 2.0. 73 */ 74 75#include <sys/param.h> 76#include <sys/systm.h> 77#include <sys/filedesc.h> 78#include <sys/ioctl.h> 79#include <sys/file_internal.h> 80#include <sys/proc_internal.h> 81#include <sys/socketvar.h> 82#include <sys/uio_internal.h> 83#include <sys/kernel.h> 84#include <sys/stat.h> 85#include <sys/malloc.h> 86#include <sys/sysproto.h> 87 88#include <sys/mount_internal.h> 89#include <sys/protosw.h> 90#include <sys/ev.h> 91#include <sys/user.h> 92#include <sys/kdebug.h> 93#include <sys/poll.h> 94#include <sys/event.h> 95#include <sys/eventvar.h> 96#include <sys/proc.h> 97 98#include <mach/mach_types.h> 99#include <kern/kern_types.h> 100#include <kern/assert.h> 101#include <kern/kalloc.h> 102#include <kern/thread.h> 103#include <kern/clock.h> 104#include <kern/ledger.h> 105#include <kern/task.h> 106 107#include <sys/mbuf.h> 108#include <sys/socket.h> 109#include <sys/socketvar.h> 110#include <sys/errno.h> 111#include <sys/syscall.h> 112#include <sys/pipe.h> 113 114#include <security/audit/audit.h> 115 116#include <net/if.h> 117#include <net/route.h> 118 119#include <netinet/in.h> 120#include <netinet/in_systm.h> 121#include <netinet/ip.h> 122#include <netinet/in_pcb.h> 123#include <netinet/ip_var.h> 124#include <netinet/ip6.h> 125#include <netinet/tcp.h> 126#include <netinet/tcp_fsm.h> 127#include <netinet/tcp_seq.h> 128#include <netinet/tcp_timer.h> 129#include <netinet/tcp_var.h> 130#include <netinet/tcpip.h> 131#include <netinet/tcp_debug.h> 132/* for wait queue based select */ 133#include <kern/wait_queue.h> 134#include <kern/kalloc.h> 135#include <sys/vnode_internal.h> 136 137/* XXX should be in a header file somewhere */ 138void evsofree(struct socket *); 139void evpipefree(struct pipe *); 140void postpipeevent(struct pipe *, int); 141void postevent(struct socket *, struct sockbuf *, int); 142extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp); 143 144int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval); 145int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval); 146extern void *get_bsduthreadarg(thread_t); 147extern int *get_bsduthreadrval(thread_t); 148 149__private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp, 150 user_addr_t bufp, user_size_t nbyte, 151 off_t offset, int flags, user_ssize_t *retval); 152__private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp, 153 user_addr_t bufp, user_size_t nbyte, 154 off_t offset, int flags, user_ssize_t *retval); 155__private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode); 156__private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd); 157 158 159/* Conflict wait queue for when selects collide (opaque type) */ 160struct wait_queue select_conflict_queue; 161 162/* 163 * Init routine called from bsd_init.c 164 */ 165void select_wait_queue_init(void); 166void 167select_wait_queue_init(void) 168{ 169 wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO); 170} 171 172 173#if NETAT 174extern int appletalk_inited; 175#endif /* NETAT */ 176 177#define f_flag f_fglob->fg_flag 178#define f_type f_fglob->fg_type 179#define f_msgcount f_fglob->fg_msgcount 180#define f_cred f_fglob->fg_cred 181#define f_ops f_fglob->fg_ops 182#define f_offset f_fglob->fg_offset 183#define f_data f_fglob->fg_data 184 185/* 186 * Read system call. 187 * 188 * Returns: 0 Success 189 * preparefileread:EBADF 190 * preparefileread:ESPIPE 191 * preparefileread:ENXIO 192 * preparefileread:EBADF 193 * dofileread:??? 194 */ 195int 196read(struct proc *p, struct read_args *uap, user_ssize_t *retval) 197{ 198 __pthread_testcancel(1); 199 return(read_nocancel(p, (struct read_nocancel_args *)uap, retval)); 200} 201 202int 203read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval) 204{ 205 struct fileproc *fp; 206 int error; 207 int fd = uap->fd; 208 struct vfs_context context; 209 210 if ( (error = preparefileread(p, &fp, fd, 0)) ) 211 return (error); 212 213 context = *(vfs_context_current()); 214 context.vc_ucred = fp->f_fglob->fg_cred; 215 216 error = dofileread(&context, fp, uap->cbuf, uap->nbyte, 217 (off_t)-1, 0, retval); 218 219 donefileread(p, fp, fd); 220 221 return (error); 222} 223 224/* 225 * Pread system call 226 * 227 * Returns: 0 Success 228 * preparefileread:EBADF 229 * preparefileread:ESPIPE 230 * preparefileread:ENXIO 231 * preparefileread:EBADF 232 * dofileread:??? 233 */ 234int 235pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval) 236{ 237 __pthread_testcancel(1); 238 return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval)); 239} 240 241int 242pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval) 243{ 244 struct fileproc *fp = NULL; /* fp set by preparefileread() */ 245 int fd = uap->fd; 246 int error; 247 struct vfs_context context; 248 249 if ( (error = preparefileread(p, &fp, fd, 1)) ) 250 goto out; 251 252 context = *(vfs_context_current()); 253 context.vc_ucred = fp->f_fglob->fg_cred; 254 255 error = dofileread(&context, fp, uap->buf, uap->nbyte, 256 uap->offset, FOF_OFFSET, retval); 257 258 donefileread(p, fp, fd); 259 260 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE), 261 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0); 262 263out: 264 return (error); 265} 266 267/* 268 * Code common for read and pread 269 */ 270 271void 272donefileread(struct proc *p, struct fileproc *fp, int fd) 273{ 274 proc_fdlock_spin(p); 275 276 fp->f_flags &= ~FP_INCHRREAD; 277 278 fp_drop(p, fd, fp, 1); 279 proc_fdunlock(p); 280} 281 282/* 283 * Returns: 0 Success 284 * EBADF 285 * ESPIPE 286 * ENXIO 287 * fp_lookup:EBADF 288 * fo_read:??? 289 */ 290int 291preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread) 292{ 293 vnode_t vp; 294 int error; 295 struct fileproc *fp; 296 297 AUDIT_ARG(fd, fd); 298 299 proc_fdlock_spin(p); 300 301 error = fp_lookup(p, fd, &fp, 1); 302 303 if (error) { 304 proc_fdunlock(p); 305 return (error); 306 } 307 if ((fp->f_flag & FREAD) == 0) { 308 error = EBADF; 309 goto out; 310 } 311 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) { 312 error = ESPIPE; 313 goto out; 314 } 315 if (fp->f_type == DTYPE_VNODE) { 316 vp = (struct vnode *)fp->f_fglob->fg_data; 317 318 if (check_for_pread && (vnode_isfifo(vp))) { 319 error = ESPIPE; 320 goto out; 321 } 322 if (check_for_pread && (vp->v_flag & VISTTY)) { 323 error = ENXIO; 324 goto out; 325 } 326 if (vp->v_type == VCHR) 327 fp->f_flags |= FP_INCHRREAD; 328 } 329 330 *fp_ret = fp; 331 332 proc_fdunlock(p); 333 return (0); 334 335out: 336 fp_drop(p, fd, fp, 1); 337 proc_fdunlock(p); 338 return (error); 339} 340 341 342/* 343 * Returns: 0 Success 344 * EINVAL 345 * fo_read:??? 346 */ 347__private_extern__ int 348dofileread(vfs_context_t ctx, struct fileproc *fp, 349 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags, 350 user_ssize_t *retval) 351{ 352 uio_t auio; 353 user_ssize_t bytecnt; 354 long error = 0; 355 char uio_buf[ UIO_SIZEOF(1) ]; 356 357 if (nbyte > INT_MAX) 358 return (EINVAL); 359 360 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { 361 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ, 362 &uio_buf[0], sizeof(uio_buf)); 363 } else { 364 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ, 365 &uio_buf[0], sizeof(uio_buf)); 366 } 367 uio_addiov(auio, bufp, nbyte); 368 369 bytecnt = nbyte; 370 371 if ((error = fo_read(fp, auio, flags, ctx))) { 372 if (uio_resid(auio) != bytecnt && (error == ERESTART || 373 error == EINTR || error == EWOULDBLOCK)) 374 error = 0; 375 } 376 bytecnt -= uio_resid(auio); 377 378 *retval = bytecnt; 379 380 return (error); 381} 382 383/* 384 * Scatter read system call. 385 * 386 * Returns: 0 Success 387 * EINVAL 388 * ENOMEM 389 * copyin:EFAULT 390 * rd_uio:??? 391 */ 392int 393readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval) 394{ 395 __pthread_testcancel(1); 396 return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval)); 397} 398 399int 400readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval) 401{ 402 uio_t auio = NULL; 403 int error; 404 struct user_iovec *iovp; 405 406 /* Verify range bedfore calling uio_create() */ 407 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) 408 return (EINVAL); 409 410 /* allocate a uio large enough to hold the number of iovecs passed */ 411 auio = uio_create(uap->iovcnt, 0, 412 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 413 UIO_READ); 414 415 /* get location of iovecs within the uio. then copyin the iovecs from 416 * user space. 417 */ 418 iovp = uio_iovsaddr(auio); 419 if (iovp == NULL) { 420 error = ENOMEM; 421 goto ExitThisRoutine; 422 } 423 error = copyin_user_iovec_array(uap->iovp, 424 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 425 uap->iovcnt, iovp); 426 if (error) { 427 goto ExitThisRoutine; 428 } 429 430 /* finalize uio_t for use and do the IO 431 */ 432 uio_calculateresid(auio); 433 error = rd_uio(p, uap->fd, auio, retval); 434 435ExitThisRoutine: 436 if (auio != NULL) { 437 uio_free(auio); 438 } 439 return (error); 440} 441 442/* 443 * Write system call 444 * 445 * Returns: 0 Success 446 * EBADF 447 * fp_lookup:EBADF 448 * dofilewrite:??? 449 */ 450int 451write(struct proc *p, struct write_args *uap, user_ssize_t *retval) 452{ 453 __pthread_testcancel(1); 454 return(write_nocancel(p, (struct write_nocancel_args *)uap, retval)); 455 456} 457 458int 459write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval) 460{ 461 struct fileproc *fp; 462 int error; 463 int fd = uap->fd; 464 465 AUDIT_ARG(fd, fd); 466 467 error = fp_lookup(p,fd,&fp,0); 468 if (error) 469 return(error); 470 if ((fp->f_flag & FWRITE) == 0) { 471 error = EBADF; 472 } else { 473 struct vfs_context context = *(vfs_context_current()); 474 context.vc_ucred = fp->f_fglob->fg_cred; 475 476 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte, 477 (off_t)-1, 0, retval); 478 } 479 if (error == 0) 480 fp_drop_written(p, fd, fp); 481 else 482 fp_drop(p, fd, fp, 0); 483 return(error); 484} 485 486/* 487 * pwrite system call 488 * 489 * Returns: 0 Success 490 * EBADF 491 * ESPIPE 492 * ENXIO 493 * EINVAL 494 * fp_lookup:EBADF 495 * dofilewrite:??? 496 */ 497int 498pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval) 499{ 500 __pthread_testcancel(1); 501 return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval)); 502} 503 504int 505pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval) 506{ 507 struct fileproc *fp; 508 int error; 509 int fd = uap->fd; 510 vnode_t vp = (vnode_t)0; 511 512 AUDIT_ARG(fd, fd); 513 514 error = fp_lookup(p,fd,&fp,0); 515 if (error) 516 return(error); 517 518 if ((fp->f_flag & FWRITE) == 0) { 519 error = EBADF; 520 } else { 521 struct vfs_context context = *vfs_context_current(); 522 context.vc_ucred = fp->f_fglob->fg_cred; 523 524 if (fp->f_type != DTYPE_VNODE) { 525 error = ESPIPE; 526 goto errout; 527 } 528 vp = (vnode_t)fp->f_fglob->fg_data; 529 if (vnode_isfifo(vp)) { 530 error = ESPIPE; 531 goto errout; 532 } 533 if ((vp->v_flag & VISTTY)) { 534 error = ENXIO; 535 goto errout; 536 } 537 if (uap->offset == (off_t)-1) { 538 error = EINVAL; 539 goto errout; 540 } 541 542 error = dofilewrite(&context, fp, uap->buf, uap->nbyte, 543 uap->offset, FOF_OFFSET, retval); 544 } 545errout: 546 if (error == 0) 547 fp_drop_written(p, fd, fp); 548 else 549 fp_drop(p, fd, fp, 0); 550 551 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE), 552 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0); 553 554 return(error); 555} 556 557/* 558 * Returns: 0 Success 559 * EINVAL 560 * <fo_write>:EPIPE 561 * <fo_write>:??? [indirect through struct fileops] 562 */ 563__private_extern__ int 564dofilewrite(vfs_context_t ctx, struct fileproc *fp, 565 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags, 566 user_ssize_t *retval) 567{ 568 uio_t auio; 569 long error = 0; 570 user_ssize_t bytecnt; 571 char uio_buf[ UIO_SIZEOF(1) ]; 572 573 if (nbyte > INT_MAX) 574 return (EINVAL); 575 576 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { 577 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE, 578 &uio_buf[0], sizeof(uio_buf)); 579 } else { 580 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE, 581 &uio_buf[0], sizeof(uio_buf)); 582 } 583 uio_addiov(auio, bufp, nbyte); 584 585 bytecnt = nbyte; 586 if ((error = fo_write(fp, auio, flags, ctx))) { 587 if (uio_resid(auio) != bytecnt && (error == ERESTART || 588 error == EINTR || error == EWOULDBLOCK)) 589 error = 0; 590 /* The socket layer handles SIGPIPE */ 591 if (error == EPIPE && fp->f_type != DTYPE_SOCKET && 592 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0) { 593 /* XXX Raise the signal on the thread? */ 594 psignal(vfs_context_proc(ctx), SIGPIPE); 595 } 596 } 597 bytecnt -= uio_resid(auio); 598 *retval = bytecnt; 599 600 return (error); 601} 602 603/* 604 * Gather write system call 605 */ 606int 607writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval) 608{ 609 __pthread_testcancel(1); 610 return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval)); 611} 612 613int 614writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval) 615{ 616 uio_t auio = NULL; 617 int error; 618 struct user_iovec *iovp; 619 620 AUDIT_ARG(fd, uap->fd); 621 622 /* Verify range bedfore calling uio_create() */ 623 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) 624 return (EINVAL); 625 626 /* allocate a uio large enough to hold the number of iovecs passed */ 627 auio = uio_create(uap->iovcnt, 0, 628 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 629 UIO_WRITE); 630 631 /* get location of iovecs within the uio. then copyin the iovecs from 632 * user space. 633 */ 634 iovp = uio_iovsaddr(auio); 635 if (iovp == NULL) { 636 error = ENOMEM; 637 goto ExitThisRoutine; 638 } 639 error = copyin_user_iovec_array(uap->iovp, 640 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 641 uap->iovcnt, iovp); 642 if (error) { 643 goto ExitThisRoutine; 644 } 645 646 /* finalize uio_t for use and do the IO 647 */ 648 uio_calculateresid(auio); 649 error = wr_uio(p, uap->fd, auio, retval); 650 651ExitThisRoutine: 652 if (auio != NULL) { 653 uio_free(auio); 654 } 655 return (error); 656} 657 658 659int 660wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval) 661{ 662 struct fileproc *fp; 663 int error; 664 user_ssize_t count; 665 struct vfs_context context = *vfs_context_current(); 666 667 error = fp_lookup(p,fdes,&fp,0); 668 if (error) 669 return(error); 670 671 if ((fp->f_flag & FWRITE) == 0) { 672 error = EBADF; 673 goto out; 674 } 675 count = uio_resid(uio); 676 677 context.vc_ucred = fp->f_cred; 678 error = fo_write(fp, uio, 0, &context); 679 if (error) { 680 if (uio_resid(uio) != count && (error == ERESTART || 681 error == EINTR || error == EWOULDBLOCK)) 682 error = 0; 683 /* The socket layer handles SIGPIPE */ 684 if (error == EPIPE && fp->f_type != DTYPE_SOCKET && 685 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0) 686 psignal(p, SIGPIPE); 687 } 688 *retval = count - uio_resid(uio); 689 690out: 691 if (error == 0) 692 fp_drop_written(p, fdes, fp); 693 else 694 fp_drop(p, fdes, fp, 0); 695 return(error); 696} 697 698 699int 700rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval) 701{ 702 struct fileproc *fp; 703 int error; 704 user_ssize_t count; 705 struct vfs_context context = *vfs_context_current(); 706 707 if ( (error = preparefileread(p, &fp, fdes, 0)) ) 708 return (error); 709 710 count = uio_resid(uio); 711 712 context.vc_ucred = fp->f_cred; 713 714 error = fo_read(fp, uio, 0, &context); 715 716 if (error) { 717 if (uio_resid(uio) != count && (error == ERESTART || 718 error == EINTR || error == EWOULDBLOCK)) 719 error = 0; 720 } 721 *retval = count - uio_resid(uio); 722 723 donefileread(p, fp, fdes); 724 725 return (error); 726} 727 728/* 729 * Ioctl system call 730 * 731 * Returns: 0 Success 732 * EBADF 733 * ENOTTY 734 * ENOMEM 735 * ESRCH 736 * copyin:EFAULT 737 * copyoutEFAULT 738 * fp_lookup:EBADF Bad file descriptor 739 * fo_ioctl:??? 740 */ 741int 742ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval) 743{ 744 struct fileproc *fp; 745 u_long com; 746 int error = 0; 747 u_int size; 748 caddr_t datap, memp; 749 boolean_t is64bit; 750 int tmp; 751#define STK_PARAMS 128 752 char stkbuf[STK_PARAMS]; 753 int fd = uap->fd; 754 struct vfs_context context = *vfs_context_current(); 755 756 AUDIT_ARG(fd, uap->fd); 757 AUDIT_ARG(addr, uap->data); 758 759 is64bit = proc_is64bit(p); 760#if CONFIG_AUDIT 761 if (is64bit) 762 AUDIT_ARG(value64, uap->com); 763 else 764 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, uap->com)); 765#endif /* CONFIG_AUDIT */ 766 767 proc_fdlock(p); 768 error = fp_lookup(p,fd,&fp,1); 769 if (error) { 770 proc_fdunlock(p); 771 return(error); 772 } 773 774 AUDIT_ARG(file, p, fp); 775 776 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 777 error = EBADF; 778 goto out; 779 } 780 781 context.vc_ucred = fp->f_fglob->fg_cred; 782 783#if CONFIG_MACF 784 error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com); 785 if (error) 786 goto out; 787#endif 788 789#if NETAT 790 /* 791 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work 792 * while implementing an ATioctl system call 793 */ 794 { 795 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) { 796 u_long fixed_command; 797 798#ifdef APPLETALK_DEBUG 799 kprintf("ioctl: special AppleTalk \n"); 800#endif 801 datap = &stkbuf[0]; 802 *(user_addr_t *)datap = uap->data; 803 fixed_command = _IOW(0, 0xff99, uap->data); 804 error = fo_ioctl(fp, fixed_command, datap, &context); 805 goto out; 806 } 807 } 808 809#endif /* NETAT */ 810 811 812 switch (com = uap->com) { 813 case FIONCLEX: 814 *fdflags(p, uap->fd) &= ~UF_EXCLOSE; 815 error =0; 816 goto out; 817 case FIOCLEX: 818 *fdflags(p, uap->fd) |= UF_EXCLOSE; 819 error =0; 820 goto out; 821 } 822 823 /* 824 * Interpret high order word to find amount of data to be 825 * copied to/from the user's address space. 826 */ 827 size = IOCPARM_LEN(com); 828 if (size > IOCPARM_MAX) { 829 error = ENOTTY; 830 goto out; 831 } 832 memp = NULL; 833 if (size > sizeof (stkbuf)) { 834 proc_fdunlock(p); 835 if ((memp = (caddr_t)kalloc(size)) == 0) { 836 proc_fdlock(p); 837 error = ENOMEM; 838 goto out; 839 } 840 proc_fdlock(p); 841 datap = memp; 842 } else 843 datap = &stkbuf[0]; 844 if (com&IOC_IN) { 845 if (size) { 846 proc_fdunlock(p); 847 error = copyin(uap->data, datap, size); 848 if (error) { 849 if (memp) 850 kfree(memp, size); 851 proc_fdlock(p); 852 goto out; 853 } 854 proc_fdlock(p); 855 } else { 856 /* XXX - IOC_IN and no size? we should proably return an error here!! */ 857 if (is64bit) { 858 *(user_addr_t *)datap = uap->data; 859 } 860 else { 861 *(uint32_t *)datap = (uint32_t)uap->data; 862 } 863 } 864 } else if ((com&IOC_OUT) && size) 865 /* 866 * Zero the buffer so the user always 867 * gets back something deterministic. 868 */ 869 bzero(datap, size); 870 else if (com&IOC_VOID) { 871 /* XXX - this is odd since IOC_VOID means no parameters */ 872 if (is64bit) { 873 *(user_addr_t *)datap = uap->data; 874 } 875 else { 876 *(uint32_t *)datap = (uint32_t)uap->data; 877 } 878 } 879 880 switch (com) { 881 882 case FIONBIO: 883 if ( (tmp = *(int *)datap) ) 884 fp->f_flag |= FNONBLOCK; 885 else 886 fp->f_flag &= ~FNONBLOCK; 887 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context); 888 break; 889 890 case FIOASYNC: 891 if ( (tmp = *(int *)datap) ) 892 fp->f_flag |= FASYNC; 893 else 894 fp->f_flag &= ~FASYNC; 895 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context); 896 break; 897 898 case FIOSETOWN: 899 tmp = *(int *)datap; 900 if (fp->f_type == DTYPE_SOCKET) { 901 ((struct socket *)fp->f_data)->so_pgid = tmp; 902 error = 0; 903 break; 904 } 905 if (fp->f_type == DTYPE_PIPE) { 906 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context); 907 break; 908 } 909 if (tmp <= 0) { 910 tmp = -tmp; 911 } else { 912 struct proc *p1 = proc_find(tmp); 913 if (p1 == 0) { 914 error = ESRCH; 915 break; 916 } 917 tmp = p1->p_pgrpid; 918 proc_rele(p1); 919 } 920 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context); 921 break; 922 923 case FIOGETOWN: 924 if (fp->f_type == DTYPE_SOCKET) { 925 error = 0; 926 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid; 927 break; 928 } 929 error = fo_ioctl(fp, TIOCGPGRP, datap, &context); 930 *(int *)datap = -*(int *)datap; 931 break; 932 933 default: 934 error = fo_ioctl(fp, com, datap, &context); 935 /* 936 * Copy any data to user, size was 937 * already set and checked above. 938 */ 939 if (error == 0 && (com&IOC_OUT) && size) 940 error = copyout(datap, uap->data, (u_int)size); 941 break; 942 } 943 proc_fdunlock(p); 944 if (memp) 945 kfree(memp, size); 946 proc_fdlock(p); 947out: 948 fp_drop(p, fd, fp, 1); 949 proc_fdunlock(p); 950 return(error); 951} 952 953int selwait, nselcoll; 954#define SEL_FIRSTPASS 1 955#define SEL_SECONDPASS 2 956extern int selcontinue(int error); 957extern int selprocess(int error, int sel_pass); 958static int selscan(struct proc *p, struct _select * sel, 959 int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub); 960static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count); 961static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount); 962static int seldrop(struct proc *p, u_int32_t *ibits, int nfd); 963 964/* 965 * Select system call. 966 * 967 * Returns: 0 Success 968 * EINVAL Invalid argument 969 * EAGAIN Nonconformant error if allocation fails 970 * selprocess:??? 971 */ 972int 973select(struct proc *p, struct select_args *uap, int32_t *retval) 974{ 975 __pthread_testcancel(1); 976 return(select_nocancel(p, (struct select_nocancel_args *)uap, retval)); 977} 978 979int 980select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval) 981{ 982 int error = 0; 983 u_int ni, nw, size; 984 thread_t th_act; 985 struct uthread *uth; 986 struct _select *sel; 987 int needzerofill = 1; 988 int count = 0; 989 990 th_act = current_thread(); 991 uth = get_bsdthread_info(th_act); 992 sel = &uth->uu_select; 993 retval = (int *)get_bsduthreadrval(th_act); 994 *retval = 0; 995 996 if (uap->nd < 0) { 997 return (EINVAL); 998 } 999 1000 /* select on thread of process that already called proc_exit() */ 1001 if (p->p_fd == NULL) { 1002 return (EBADF); 1003 } 1004 1005 if (uap->nd > p->p_fd->fd_nfiles) 1006 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 1007 1008 nw = howmany(uap->nd, NFDBITS); 1009 ni = nw * sizeof(fd_mask); 1010 1011 /* 1012 * if the previously allocated space for the bits is smaller than 1013 * what is requested or no space has yet been allocated for this 1014 * thread, allocate enough space now. 1015 * 1016 * Note: If this process fails, select() will return EAGAIN; this 1017 * is the same thing pool() returns in a no-memory situation, but 1018 * it is not a POSIX compliant error code for select(). 1019 */ 1020 if (sel->nbytes < (3 * ni)) { 1021 int nbytes = 3 * ni; 1022 1023 /* Free previous allocation, if any */ 1024 if (sel->ibits != NULL) 1025 FREE(sel->ibits, M_TEMP); 1026 if (sel->obits != NULL) { 1027 FREE(sel->obits, M_TEMP); 1028 /* NULL out; subsequent ibits allocation may fail */ 1029 sel->obits = NULL; 1030 } 1031 1032 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO); 1033 if (sel->ibits == NULL) 1034 return (EAGAIN); 1035 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO); 1036 if (sel->obits == NULL) { 1037 FREE(sel->ibits, M_TEMP); 1038 sel->ibits = NULL; 1039 return (EAGAIN); 1040 } 1041 sel->nbytes = nbytes; 1042 needzerofill = 0; 1043 } 1044 1045 if (needzerofill) { 1046 bzero((caddr_t)sel->ibits, sel->nbytes); 1047 bzero((caddr_t)sel->obits, sel->nbytes); 1048 } 1049 1050 /* 1051 * get the bits from the user address space 1052 */ 1053#define getbits(name, x) \ 1054 do { \ 1055 if (uap->name && (error = copyin(uap->name, \ 1056 (caddr_t)&sel->ibits[(x) * nw], ni))) \ 1057 goto continuation; \ 1058 } while (0) 1059 1060 getbits(in, 0); 1061 getbits(ou, 1); 1062 getbits(ex, 2); 1063#undef getbits 1064 1065 if (uap->tv) { 1066 struct timeval atv; 1067 if (IS_64BIT_PROCESS(p)) { 1068 struct user64_timeval atv64; 1069 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64)); 1070 /* Loses resolution - assume timeout < 68 years */ 1071 atv.tv_sec = atv64.tv_sec; 1072 atv.tv_usec = atv64.tv_usec; 1073 } else { 1074 struct user32_timeval atv32; 1075 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32)); 1076 atv.tv_sec = atv32.tv_sec; 1077 atv.tv_usec = atv32.tv_usec; 1078 } 1079 if (error) 1080 goto continuation; 1081 if (itimerfix(&atv)) { 1082 error = EINVAL; 1083 goto continuation; 1084 } 1085 1086 clock_absolutetime_interval_to_deadline( 1087 tvtoabstime(&atv), &sel->abstime); 1088 } 1089 else 1090 sel->abstime = 0; 1091 1092 if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) { 1093 goto continuation; 1094 } 1095 1096 sel->count = count; 1097 size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK); 1098 if (uth->uu_allocsize) { 1099 if (uth->uu_wqset == 0) 1100 panic("select: wql memory smashed"); 1101 /* needed for the select now */ 1102 if (size > uth->uu_allocsize) { 1103 kfree(uth->uu_wqset, uth->uu_allocsize); 1104 uth->uu_allocsize = size; 1105 uth->uu_wqset = (wait_queue_set_t)kalloc(size); 1106 if (uth->uu_wqset == (wait_queue_set_t)NULL) 1107 panic("failed to allocate memory for waitqueue\n"); 1108 } 1109 } else { 1110 uth->uu_allocsize = size; 1111 uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize); 1112 if (uth->uu_wqset == (wait_queue_set_t)NULL) 1113 panic("failed to allocate memory for waitqueue\n"); 1114 } 1115 bzero(uth->uu_wqset, size); 1116 sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET; 1117 wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST)); 1118 1119continuation: 1120 1121 if (error) { 1122 /* 1123 * We have already cleaned up any state we established, 1124 * either locally or as a result of selcount(). We don't 1125 * need to wait_subqueue_unlink_all(), since we haven't set 1126 * anything at this point. 1127 */ 1128 return (error); 1129 } 1130 1131 return selprocess(0, SEL_FIRSTPASS); 1132} 1133 1134int 1135selcontinue(int error) 1136{ 1137 return selprocess(error, SEL_SECONDPASS); 1138} 1139 1140 1141/* 1142 * selprocess 1143 * 1144 * Parameters: error The error code from our caller 1145 * sel_pass The pass we are on 1146 */ 1147int 1148selprocess(int error, int sel_pass) 1149{ 1150 int ncoll; 1151 u_int ni, nw; 1152 thread_t th_act; 1153 struct uthread *uth; 1154 struct proc *p; 1155 struct select_args *uap; 1156 int *retval; 1157 struct _select *sel; 1158 int unwind = 1; 1159 int prepost = 0; 1160 int somewakeup = 0; 1161 int doretry = 0; 1162 wait_result_t wait_result; 1163 1164 p = current_proc(); 1165 th_act = current_thread(); 1166 uap = (struct select_args *)get_bsduthreadarg(th_act); 1167 retval = (int *)get_bsduthreadrval(th_act); 1168 uth = get_bsdthread_info(th_act); 1169 sel = &uth->uu_select; 1170 1171 if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) 1172 unwind = 0; 1173 if (sel->count == 0) 1174 unwind = 0; 1175retry: 1176 if (error != 0) { 1177 sel_pass = SEL_FIRSTPASS; /* Reset for seldrop */ 1178 goto done; 1179 } 1180 1181 ncoll = nselcoll; 1182 OSBitOrAtomic(P_SELECT, &p->p_flag); 1183 /* skip scans if the select is just for timeouts */ 1184 if (sel->count) { 1185 /* 1186 * Clear out any dangling refs from prior calls; technically 1187 * there should not be any. 1188 */ 1189 if (sel_pass == SEL_FIRSTPASS) 1190 wait_queue_sub_clearrefs(uth->uu_wqset); 1191 1192 error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset); 1193 if (error || *retval) { 1194 goto done; 1195 } 1196 if (prepost) { 1197 /* if the select of log, then we canwakeup and discover some one 1198 * else already read the data; go toselct again if time permits 1199 */ 1200 prepost = 0; 1201 doretry = 1; 1202 } 1203 if (somewakeup) { 1204 somewakeup = 0; 1205 doretry = 1; 1206 } 1207 } 1208 1209 if (uap->tv) { 1210 uint64_t now; 1211 1212 clock_get_uptime(&now); 1213 if (now >= sel->abstime) 1214 goto done; 1215 } 1216 1217 if (doretry) { 1218 /* cleanup obits and try again */ 1219 doretry = 0; 1220 sel_pass = SEL_FIRSTPASS; 1221 goto retry; 1222 } 1223 1224 /* 1225 * To effect a poll, the timeout argument should be 1226 * non-nil, pointing to a zero-valued timeval structure. 1227 */ 1228 if (uap->tv && sel->abstime == 0) { 1229 goto done; 1230 } 1231 1232 /* No spurious wakeups due to colls,no need to check for them */ 1233 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) { 1234 sel_pass = SEL_FIRSTPASS; 1235 goto retry; 1236 } 1237 1238 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); 1239 1240 /* if the select is just for timeout skip check */ 1241 if (sel->count &&(sel_pass == SEL_SECONDPASS)) 1242 panic("selprocess: 2nd pass assertwaiting"); 1243 1244 /* Wait Queue Subordinate has waitqueue as first element */ 1245 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset, 1246 NULL, THREAD_ABORTSAFE, sel->abstime); 1247 if (wait_result != THREAD_AWAKENED) { 1248 /* there are no preposted events */ 1249 error = tsleep1(NULL, PSOCK | PCATCH, 1250 "select", 0, selcontinue); 1251 } else { 1252 prepost = 1; 1253 error = 0; 1254 } 1255 1256 if (error == 0) { 1257 sel_pass = SEL_SECONDPASS; 1258 if (!prepost) 1259 somewakeup = 1; 1260 goto retry; 1261 } 1262done: 1263 if (unwind) { 1264 wait_subqueue_unlink_all(uth->uu_wqset); 1265 seldrop(p, sel->ibits, uap->nd); 1266 } 1267 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); 1268 /* select is not restarted after signals... */ 1269 if (error == ERESTART) 1270 error = EINTR; 1271 if (error == EWOULDBLOCK) 1272 error = 0; 1273 nw = howmany(uap->nd, NFDBITS); 1274 ni = nw * sizeof(fd_mask); 1275 1276#define putbits(name, x) \ 1277 do { \ 1278 if (uap->name && (error2 = \ 1279 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \ 1280 error = error2; \ 1281 } while (0) 1282 1283 if (error == 0) { 1284 int error2; 1285 1286 putbits(in, 0); 1287 putbits(ou, 1); 1288 putbits(ex, 2); 1289#undef putbits 1290 } 1291 return(error); 1292} 1293 1294 1295/* 1296 * selscan 1297 * 1298 * Parameters: p Process performing the select 1299 * sel The per-thread select context structure 1300 * nfd The number of file descriptors to scan 1301 * retval The per thread system call return area 1302 * sel_pass Which pass this is; allowed values are 1303 * SEL_FIRSTPASS and SEL_SECONDPASS 1304 * wqsub The per thread wait queue set 1305 * 1306 * Returns: 0 Success 1307 * EIO Invalid p->p_fd field XXX Obsolete? 1308 * EBADF One of the files in the bit vector is 1309 * invalid. 1310 */ 1311static int 1312selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval, 1313 int sel_pass, wait_queue_sub_t wqsub) 1314{ 1315 struct filedesc *fdp = p->p_fd; 1316 int msk, i, j, fd; 1317 u_int32_t bits; 1318 struct fileproc *fp; 1319 int n = 0; /* count of bits */ 1320 int nc = 0; /* bit vector offset (nc'th bit) */ 1321 static int flag[3] = { FREAD, FWRITE, 0 }; 1322 u_int32_t *iptr, *optr; 1323 u_int nw; 1324 u_int32_t *ibits, *obits; 1325 char * wql; 1326 char * wql_ptr; 1327 int count; 1328 struct vfs_context context = *vfs_context_current(); 1329 1330 /* 1331 * Problems when reboot; due to MacOSX signal probs 1332 * in Beaker1C ; verify that the p->p_fd is valid 1333 */ 1334 if (fdp == NULL) { 1335 *retval=0; 1336 return(EIO); 1337 } 1338 ibits = sel->ibits; 1339 obits = sel->obits; 1340 wql = sel->wql; 1341 1342 nw = howmany(nfd, NFDBITS); 1343 1344 count = sel->count; 1345 1346 nc = 0; 1347 if (count) { 1348 proc_fdlock(p); 1349 for (msk = 0; msk < 3; msk++) { 1350 iptr = (u_int32_t *)&ibits[msk * nw]; 1351 optr = (u_int32_t *)&obits[msk * nw]; 1352 1353 for (i = 0; i < nfd; i += NFDBITS) { 1354 bits = iptr[i/NFDBITS]; 1355 1356 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 1357 bits &= ~(1 << j); 1358 fp = fdp->fd_ofiles[fd]; 1359 1360 if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) { 1361 /* 1362 * If we abort because of a bad 1363 * fd, let the caller unwind... 1364 */ 1365 proc_fdunlock(p); 1366 return(EBADF); 1367 } 1368 if (sel_pass == SEL_SECONDPASS) { 1369 wql_ptr = (char *)0; 1370 if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)wqsub)) { 1371 fp->f_flags &= ~FP_INSELECT; 1372 fp->f_waddr = (void *)0; 1373 } 1374 } else { 1375 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK); 1376 if (fp->f_flags & FP_INSELECT) { 1377 /* someone is already in select on this fp */ 1378 fp->f_flags |= FP_SELCONFLICT; 1379 wait_queue_link(&select_conflict_queue, (wait_queue_set_t)wqsub); 1380 } else { 1381 fp->f_flags |= FP_INSELECT; 1382 fp->f_waddr = (void *)wqsub; 1383 } 1384 } 1385 1386 context.vc_ucred = fp->f_cred; 1387 1388 /* The select; set the bit, if true */ 1389 if (fp->f_ops 1390 && fo_select(fp, flag[msk], wql_ptr, &context)) { 1391 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); 1392 n++; 1393 } 1394 nc++; 1395 } 1396 } 1397 } 1398 proc_fdunlock(p); 1399 } 1400 *retval = n; 1401 return (0); 1402} 1403 1404int poll_callback(struct kqueue *, struct kevent64_s *, void *); 1405 1406struct poll_continue_args { 1407 user_addr_t pca_fds; 1408 u_int pca_nfds; 1409 u_int pca_rfds; 1410}; 1411 1412int 1413poll(struct proc *p, struct poll_args *uap, int32_t *retval) 1414{ 1415 __pthread_testcancel(1); 1416 return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval)); 1417} 1418 1419 1420int 1421poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval) 1422{ 1423 struct poll_continue_args *cont; 1424 struct pollfd *fds; 1425 struct kqueue *kq; 1426 struct timeval atv; 1427 int ncoll, error = 0; 1428 u_int nfds = uap->nfds; 1429 u_int rfds = 0; 1430 u_int i; 1431 size_t ni; 1432 1433 /* 1434 * This is kinda bogus. We have fd limits, but that is not 1435 * really related to the size of the pollfd array. Make sure 1436 * we let the process use at least FD_SETSIZE entries and at 1437 * least enough for the current limits. We want to be reasonably 1438 * safe, but not overly restrictive. 1439 */ 1440 if (nfds > OPEN_MAX || 1441 (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE))) 1442 return (EINVAL); 1443 1444 kq = kqueue_alloc(p); 1445 if (kq == NULL) 1446 return (EAGAIN); 1447 1448 ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args); 1449 MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK); 1450 if (NULL == cont) { 1451 error = EAGAIN; 1452 goto out; 1453 } 1454 1455 fds = (struct pollfd *)&cont[1]; 1456 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd)); 1457 if (error) 1458 goto out; 1459 1460 if (uap->timeout != -1) { 1461 struct timeval rtv; 1462 1463 atv.tv_sec = uap->timeout / 1000; 1464 atv.tv_usec = (uap->timeout % 1000) * 1000; 1465 if (itimerfix(&atv)) { 1466 error = EINVAL; 1467 goto out; 1468 } 1469 getmicrouptime(&rtv); 1470 timevaladd(&atv, &rtv); 1471 } else { 1472 atv.tv_sec = 0; 1473 atv.tv_usec = 0; 1474 } 1475 1476 /* JMM - all this P_SELECT stuff is bogus */ 1477 ncoll = nselcoll; 1478 OSBitOrAtomic(P_SELECT, &p->p_flag); 1479 for (i = 0; i < nfds; i++) { 1480 short events = fds[i].events; 1481 struct kevent64_s kev; 1482 int kerror = 0; 1483 1484 /* per spec, ignore fd values below zero */ 1485 if (fds[i].fd < 0) { 1486 fds[i].revents = 0; 1487 continue; 1488 } 1489 1490 /* convert the poll event into a kqueue kevent */ 1491 kev.ident = fds[i].fd; 1492 kev.flags = EV_ADD | EV_ONESHOT | EV_POLL; 1493 kev.udata = CAST_USER_ADDR_T(&fds[i]); 1494 kev.fflags = 0; 1495 kev.data = 0; 1496 kev.ext[0] = 0; 1497 kev.ext[1] = 0; 1498 1499 /* Handle input events */ 1500 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) { 1501 kev.filter = EVFILT_READ; 1502 if (!(events & ( POLLIN | POLLRDNORM ))) 1503 kev.flags |= EV_OOBAND; 1504 kerror = kevent_register(kq, &kev, p); 1505 } 1506 1507 /* Handle output events */ 1508 if (kerror == 0 && 1509 events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) { 1510 kev.filter = EVFILT_WRITE; 1511 kerror = kevent_register(kq, &kev, p); 1512 } 1513 1514 /* Handle BSD extension vnode events */ 1515 if (kerror == 0 && 1516 events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) { 1517 kev.filter = EVFILT_VNODE; 1518 kev.fflags = 0; 1519 if (events & POLLEXTEND) 1520 kev.fflags |= NOTE_EXTEND; 1521 if (events & POLLATTRIB) 1522 kev.fflags |= NOTE_ATTRIB; 1523 if (events & POLLNLINK) 1524 kev.fflags |= NOTE_LINK; 1525 if (events & POLLWRITE) 1526 kev.fflags |= NOTE_WRITE; 1527 kerror = kevent_register(kq, &kev, p); 1528 } 1529 1530 if (kerror != 0) { 1531 fds[i].revents = POLLNVAL; 1532 rfds++; 1533 } else 1534 fds[i].revents = 0; 1535 } 1536 1537 /* Did we have any trouble registering? */ 1538 if (rfds > 0) 1539 goto done; 1540 1541 /* scan for, and possibly wait for, the kevents to trigger */ 1542 cont->pca_fds = uap->fds; 1543 cont->pca_nfds = nfds; 1544 cont->pca_rfds = rfds; 1545 error = kqueue_scan(kq, poll_callback, NULL, cont, &atv, p); 1546 rfds = cont->pca_rfds; 1547 1548 done: 1549 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); 1550 /* poll is not restarted after signals... */ 1551 if (error == ERESTART) 1552 error = EINTR; 1553 if (error == EWOULDBLOCK) 1554 error = 0; 1555 if (error == 0) { 1556 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd)); 1557 *retval = rfds; 1558 } 1559 1560 out: 1561 if (NULL != cont) 1562 FREE(cont, M_TEMP); 1563 1564 kqueue_dealloc(kq); 1565 return (error); 1566} 1567 1568int 1569poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data) 1570{ 1571 struct poll_continue_args *cont = (struct poll_continue_args *)data; 1572 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata); 1573 short prev_revents = fds->revents; 1574 short mask; 1575 1576 /* convert the results back into revents */ 1577 if (kevp->flags & EV_EOF) 1578 fds->revents |= POLLHUP; 1579 if (kevp->flags & EV_ERROR) 1580 fds->revents |= POLLERR; 1581 1582 switch (kevp->filter) { 1583 case EVFILT_READ: 1584 if (fds->revents & POLLHUP) 1585 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND ); 1586 else { 1587 mask = 0; 1588 if (kevp->data != 0) 1589 mask |= (POLLIN | POLLRDNORM ); 1590 if (kevp->flags & EV_OOBAND) 1591 mask |= ( POLLPRI | POLLRDBAND ); 1592 } 1593 fds->revents |= (fds->events & mask); 1594 break; 1595 1596 case EVFILT_WRITE: 1597 if (!(fds->revents & POLLHUP)) 1598 fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND )); 1599 break; 1600 1601 case EVFILT_VNODE: 1602 if (kevp->fflags & NOTE_EXTEND) 1603 fds->revents |= (fds->events & POLLEXTEND); 1604 if (kevp->fflags & NOTE_ATTRIB) 1605 fds->revents |= (fds->events & POLLATTRIB); 1606 if (kevp->fflags & NOTE_LINK) 1607 fds->revents |= (fds->events & POLLNLINK); 1608 if (kevp->fflags & NOTE_WRITE) 1609 fds->revents |= (fds->events & POLLWRITE); 1610 break; 1611 } 1612 1613 if (fds->revents != 0 && prev_revents == 0) 1614 cont->pca_rfds++; 1615 1616 return 0; 1617} 1618 1619int 1620seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p) 1621{ 1622 1623 return (1); 1624} 1625 1626/* 1627 * selcount 1628 * 1629 * Count the number of bits set in the input bit vector, and establish an 1630 * outstanding fp->f_iocount for each of the descriptors which will be in 1631 * use in the select operation. 1632 * 1633 * Parameters: p The process doing the select 1634 * ibits The input bit vector 1635 * nfd The number of fd's in the vector 1636 * countp Pointer to where to store the bit count 1637 * 1638 * Returns: 0 Success 1639 * EIO Bad per process open file table 1640 * EBADF One of the bits in the input bit vector 1641 * references an invalid fd 1642 * 1643 * Implicit: *countp (modified) Count of fd's 1644 * 1645 * Notes: This function is the first pass under the proc_fdlock() that 1646 * permits us to recognize invalid descriptors in the bit vector; 1647 * the may, however, not remain valid through the drop and 1648 * later reacquisition of the proc_fdlock(). 1649 */ 1650static int 1651selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp) 1652{ 1653 struct filedesc *fdp = p->p_fd; 1654 int msk, i, j, fd; 1655 u_int32_t bits; 1656 struct fileproc *fp; 1657 int n = 0; 1658 u_int32_t *iptr; 1659 u_int nw; 1660 int error=0; 1661 int dropcount; 1662 int need_wakeup = 0; 1663 1664 /* 1665 * Problems when reboot; due to MacOSX signal probs 1666 * in Beaker1C ; verify that the p->p_fd is valid 1667 */ 1668 if (fdp == NULL) { 1669 *countp = 0; 1670 return(EIO); 1671 } 1672 nw = howmany(nfd, NFDBITS); 1673 1674 proc_fdlock(p); 1675 for (msk = 0; msk < 3; msk++) { 1676 iptr = (u_int32_t *)&ibits[msk * nw]; 1677 for (i = 0; i < nfd; i += NFDBITS) { 1678 bits = iptr[i/NFDBITS]; 1679 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 1680 bits &= ~(1 << j); 1681 fp = fdp->fd_ofiles[fd]; 1682 if (fp == NULL || 1683 (fdp->fd_ofileflags[fd] & UF_RESERVED)) { 1684 *countp = 0; 1685 error = EBADF; 1686 goto bad; 1687 } 1688 fp->f_iocount++; 1689 n++; 1690 } 1691 } 1692 } 1693 proc_fdunlock(p); 1694 1695 *countp = n; 1696 return (0); 1697 1698bad: 1699 dropcount = 0; 1700 1701 if (n== 0) 1702 goto out; 1703 /* Ignore error return; it's already EBADF */ 1704 (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1); 1705 1706out: 1707 proc_fdunlock(p); 1708 if (need_wakeup) { 1709 wakeup(&p->p_fpdrainwait); 1710 } 1711 return(error); 1712} 1713 1714 1715/* 1716 * seldrop_locked 1717 * 1718 * Drop outstanding wait queue references set up during selscan(); drop the 1719 * outstanding per fileproc f_iocount() picked up during the selcount(). 1720 * 1721 * Parameters: p Process performing the select 1722 * ibits Input pit bector of fd's 1723 * nfd Number of fd's 1724 * lim Limit to number of vector entries to 1725 * consider, or -1 for "all" 1726 * inselect True if 1727 * need_wakeup Pointer to flag to set to do a wakeup 1728 * if f_iocont on any descriptor goes to 0 1729 * 1730 * Returns: 0 Success 1731 * EBADF One or more fds in the bit vector 1732 * were invalid, but the rest 1733 * were successfully dropped 1734 * 1735 * Notes: An fd make become bad while the proc_fdlock() is not held, 1736 * if a multithreaded application closes the fd out from under 1737 * the in progress select. In this case, we still have to 1738 * clean up after the set up on the remaining fds. 1739 */ 1740static int 1741seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount) 1742{ 1743 struct filedesc *fdp = p->p_fd; 1744 int msk, i, j, fd; 1745 u_int32_t bits; 1746 struct fileproc *fp; 1747 u_int32_t *iptr; 1748 u_int nw; 1749 int error = 0; 1750 int dropcount = 0; 1751 uthread_t uth = get_bsdthread_info(current_thread()); 1752 1753 *need_wakeup = 0; 1754 1755 /* 1756 * Problems when reboot; due to MacOSX signal probs 1757 * in Beaker1C ; verify that the p->p_fd is valid 1758 */ 1759 if (fdp == NULL) { 1760 return(EIO); 1761 } 1762 1763 nw = howmany(nfd, NFDBITS); 1764 1765 for (msk = 0; msk < 3; msk++) { 1766 iptr = (u_int32_t *)&ibits[msk * nw]; 1767 for (i = 0; i < nfd; i += NFDBITS) { 1768 bits = iptr[i/NFDBITS]; 1769 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 1770 bits &= ~(1 << j); 1771 fp = fdp->fd_ofiles[fd]; 1772 /* 1773 * If we've already dropped as many as were 1774 * counted/scanned, then we are done. 1775 */ 1776 if ((fromselcount != 0) && (++dropcount > lim)) 1777 goto done; 1778 1779 if (fp == NULL) { 1780 /* skip (now) bad fds */ 1781 error = EBADF; 1782 continue; 1783 } 1784 /* 1785 * Only clear the flag if we set it. We'll 1786 * only find that we set it if we had made 1787 * at least one [partial] pass through selscan(). 1788 */ 1789 if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)uth->uu_wqset)) { 1790 fp->f_flags &= ~FP_INSELECT; 1791 fp->f_waddr = (void *)0; 1792 } 1793 1794 fp->f_iocount--; 1795 if (fp->f_iocount < 0) 1796 panic("f_iocount overdecrement!"); 1797 1798 if (fp->f_iocount == 0) { 1799 /* 1800 * The last iocount is responsible for clearing 1801 * selconfict flag - even if we didn't set it - 1802 * and is also responsible for waking up anyone 1803 * waiting on iocounts to drain. 1804 */ 1805 if (fp->f_flags & FP_SELCONFLICT) 1806 fp->f_flags &= ~FP_SELCONFLICT; 1807 if (p->p_fpdrainwait) { 1808 p->p_fpdrainwait = 0; 1809 *need_wakeup = 1; 1810 } 1811 } 1812 } 1813 } 1814 } 1815done: 1816 return (error); 1817} 1818 1819 1820static int 1821seldrop(struct proc *p, u_int32_t *ibits, int nfd) 1822{ 1823 int error; 1824 int need_wakeup = 0; 1825 1826 proc_fdlock(p); 1827 error = seldrop_locked(p, ibits, nfd, nfd, &need_wakeup, 0); 1828 proc_fdunlock(p); 1829 if (need_wakeup) { 1830 wakeup(&p->p_fpdrainwait); 1831 } 1832 return (error); 1833} 1834 1835/* 1836 * Record a select request. 1837 */ 1838void 1839selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql) 1840{ 1841 thread_t cur_act = current_thread(); 1842 struct uthread * ut = get_bsdthread_info(cur_act); 1843 1844 /* need to look at collisions */ 1845 1846 /*do not record if this is second pass of select */ 1847 if(p_wql == (void *)0) { 1848 return; 1849 } 1850 1851 if ((sip->si_flags & SI_INITED) == 0) { 1852 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO); 1853 sip->si_flags |= SI_INITED; 1854 sip->si_flags &= ~SI_CLEAR; 1855 } 1856 1857 if (sip->si_flags & SI_RECORDED) { 1858 sip->si_flags |= SI_COLL; 1859 } else 1860 sip->si_flags &= ~SI_COLL; 1861 1862 sip->si_flags |= SI_RECORDED; 1863 if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset)) 1864 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset, 1865 (wait_queue_link_t)p_wql); 1866 1867 return; 1868} 1869 1870void 1871selwakeup(struct selinfo *sip) 1872{ 1873 1874 if ((sip->si_flags & SI_INITED) == 0) { 1875 return; 1876 } 1877 1878 if (sip->si_flags & SI_COLL) { 1879 nselcoll++; 1880 sip->si_flags &= ~SI_COLL; 1881#if 0 1882 /* will not support */ 1883 //wakeup((caddr_t)&selwait); 1884#endif 1885 } 1886 1887 if (sip->si_flags & SI_RECORDED) { 1888 wait_queue_wakeup_all(&sip->si_wait_queue, NULL, THREAD_AWAKENED); 1889 sip->si_flags &= ~SI_RECORDED; 1890 } 1891 1892} 1893 1894void 1895selthreadclear(struct selinfo *sip) 1896{ 1897 1898 if ((sip->si_flags & SI_INITED) == 0) { 1899 return; 1900 } 1901 if (sip->si_flags & SI_RECORDED) { 1902 selwakeup(sip); 1903 sip->si_flags &= ~(SI_RECORDED | SI_COLL); 1904 } 1905 sip->si_flags |= SI_CLEAR; 1906 wait_queue_unlink_all(&sip->si_wait_queue); 1907} 1908 1909 1910 1911 1912#define DBG_POST 0x10 1913#define DBG_WATCH 0x11 1914#define DBG_WAIT 0x12 1915#define DBG_MOD 0x13 1916#define DBG_EWAKEUP 0x14 1917#define DBG_ENQUEUE 0x15 1918#define DBG_DEQUEUE 0x16 1919 1920#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST) 1921#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH) 1922#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT) 1923#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD) 1924#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP) 1925#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE) 1926#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE) 1927 1928 1929#define EVPROCDEQUE(p, evq) do { \ 1930 proc_lock(p); \ 1931 if (evq->ee_flags & EV_QUEUED) { \ 1932 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \ 1933 evq->ee_flags &= ~EV_QUEUED; \ 1934 } \ 1935 proc_unlock(p); \ 1936} while (0); 1937 1938 1939/* 1940 * called upon socket close. deque and free all events for 1941 * the socket... socket must be locked by caller. 1942 */ 1943void 1944evsofree(struct socket *sp) 1945{ 1946 struct eventqelt *evq, *next; 1947 proc_t p; 1948 1949 if (sp == NULL) 1950 return; 1951 1952 for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) { 1953 next = evq->ee_slist.tqe_next; 1954 p = evq->ee_proc; 1955 1956 if (evq->ee_flags & EV_QUEUED) { 1957 EVPROCDEQUE(p, evq); 1958 } 1959 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q 1960 FREE(evq, M_TEMP); 1961 } 1962} 1963 1964 1965/* 1966 * called upon pipe close. deque and free all events for 1967 * the pipe... pipe must be locked by caller 1968 */ 1969void 1970evpipefree(struct pipe *cpipe) 1971{ 1972 struct eventqelt *evq, *next; 1973 proc_t p; 1974 1975 for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) { 1976 next = evq->ee_slist.tqe_next; 1977 p = evq->ee_proc; 1978 1979 EVPROCDEQUE(p, evq); 1980 1981 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q 1982 FREE(evq, M_TEMP); 1983 } 1984} 1985 1986 1987/* 1988 * enqueue this event if it's not already queued. wakeup 1989 * the proc if we do queue this event to it... 1990 * entered with proc lock held... we drop it before 1991 * doing the wakeup and return in that state 1992 */ 1993static void 1994evprocenque(struct eventqelt *evq) 1995{ 1996 proc_t p; 1997 1998 assert(evq); 1999 p = evq->ee_proc; 2000 2001 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0); 2002 2003 proc_lock(p); 2004 2005 if (evq->ee_flags & EV_QUEUED) { 2006 proc_unlock(p); 2007 2008 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0); 2009 return; 2010 } 2011 evq->ee_flags |= EV_QUEUED; 2012 2013 TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist); 2014 2015 proc_unlock(p); 2016 2017 wakeup(&p->p_evlist); 2018 2019 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0); 2020} 2021 2022 2023/* 2024 * pipe lock must be taken by the caller 2025 */ 2026void 2027postpipeevent(struct pipe *pipep, int event) 2028{ 2029 int mask; 2030 struct eventqelt *evq; 2031 2032 if (pipep == NULL) 2033 return; 2034 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0); 2035 2036 for (evq = pipep->pipe_evlist.tqh_first; 2037 evq != NULL; evq = evq->ee_slist.tqe_next) { 2038 2039 if (evq->ee_eventmask == 0) 2040 continue; 2041 mask = 0; 2042 2043 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) { 2044 2045 case EV_RWBYTES: 2046 if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) { 2047 mask |= EV_RE; 2048 evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt; 2049 } 2050 if ((evq->ee_eventmask & EV_WR) && 2051 (MAX(pipep->pipe_buffer.size,PIPE_SIZE) - pipep->pipe_buffer.cnt) >= PIPE_BUF) { 2052 2053 if (pipep->pipe_state & PIPE_EOF) { 2054 mask |= EV_WR|EV_RESET; 2055 break; 2056 } 2057 mask |= EV_WR; 2058 evq->ee_req.er_wcnt = MAX(pipep->pipe_buffer.size, PIPE_SIZE) - pipep->pipe_buffer.cnt; 2059 } 2060 break; 2061 2062 case EV_WCLOSED: 2063 case EV_RCLOSED: 2064 if ((evq->ee_eventmask & EV_RE)) { 2065 mask |= EV_RE|EV_RCLOSED; 2066 } 2067 if ((evq->ee_eventmask & EV_WR)) { 2068 mask |= EV_WR|EV_WCLOSED; 2069 } 2070 break; 2071 2072 default: 2073 return; 2074 } 2075 if (mask) { 2076 /* 2077 * disarm... postevents are nops until this event is 'read' via 2078 * waitevent and then re-armed via modwatch 2079 */ 2080 evq->ee_eventmask = 0; 2081 2082 /* 2083 * since events are disarmed until after the waitevent 2084 * the ee_req.er_xxxx fields can't change once we've 2085 * inserted this event into the proc queue... 2086 * therefore, the waitevent will see a 'consistent' 2087 * snapshot of the event, even though it won't hold 2088 * the pipe lock, and we're updating the event outside 2089 * of the proc lock, which it will hold 2090 */ 2091 evq->ee_req.er_eventbits |= mask; 2092 2093 KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0); 2094 2095 evprocenque(evq); 2096 } 2097 } 2098 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0); 2099} 2100 2101#if SOCKETS 2102/* 2103 * given either a sockbuf or a socket run down the 2104 * event list and queue ready events found... 2105 * the socket must be locked by the caller 2106 */ 2107void 2108postevent(struct socket *sp, struct sockbuf *sb, int event) 2109{ 2110 int mask; 2111 struct eventqelt *evq; 2112 struct tcpcb *tp; 2113 2114 if (sb) 2115 sp = sb->sb_so; 2116 if (sp == NULL) 2117 return; 2118 2119 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0); 2120 2121 for (evq = sp->so_evlist.tqh_first; 2122 evq != NULL; evq = evq->ee_slist.tqe_next) { 2123 2124 if (evq->ee_eventmask == 0) 2125 continue; 2126 mask = 0; 2127 2128 /* ready for reading: 2129 - byte cnt >= receive low water mark 2130 - read-half of conn closed 2131 - conn pending for listening sock 2132 - socket error pending 2133 2134 ready for writing 2135 - byte cnt avail >= send low water mark 2136 - write half of conn closed 2137 - socket error pending 2138 - non-blocking conn completed successfully 2139 2140 exception pending 2141 - out of band data 2142 - sock at out of band mark 2143 */ 2144 2145 switch (event & EV_DMASK) { 2146 2147 case EV_OOB: 2148 if ((evq->ee_eventmask & EV_EX)) { 2149 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) 2150 mask |= EV_EX|EV_OOB; 2151 } 2152 break; 2153 2154 case EV_RWBYTES|EV_OOB: 2155 if ((evq->ee_eventmask & EV_EX)) { 2156 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) 2157 mask |= EV_EX|EV_OOB; 2158 } 2159 /* 2160 * fall into the next case 2161 */ 2162 case EV_RWBYTES: 2163 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) { 2164 if (sp->so_error) { 2165 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) { 2166 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) || 2167 (tp->t_state == TCPS_CLOSED)) { 2168 mask |= EV_RE|EV_RESET; 2169 break; 2170 } 2171 } 2172 } 2173 mask |= EV_RE; 2174 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc; 2175 2176 if (sp->so_state & SS_CANTRCVMORE) { 2177 mask |= EV_FIN; 2178 break; 2179 } 2180 } 2181 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) { 2182 if (sp->so_error) { 2183 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) { 2184 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) || 2185 (tp->t_state == TCPS_CLOSED)) { 2186 mask |= EV_WR|EV_RESET; 2187 break; 2188 } 2189 } 2190 } 2191 mask |= EV_WR; 2192 evq->ee_req.er_wcnt = sbspace(&sp->so_snd); 2193 } 2194 break; 2195 2196 case EV_RCONN: 2197 if ((evq->ee_eventmask & EV_RE)) { 2198 mask |= EV_RE|EV_RCONN; 2199 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one 2200 } 2201 break; 2202 2203 case EV_WCONN: 2204 if ((evq->ee_eventmask & EV_WR)) { 2205 mask |= EV_WR|EV_WCONN; 2206 } 2207 break; 2208 2209 case EV_RCLOSED: 2210 if ((evq->ee_eventmask & EV_RE)) { 2211 mask |= EV_RE|EV_RCLOSED; 2212 } 2213 break; 2214 2215 case EV_WCLOSED: 2216 if ((evq->ee_eventmask & EV_WR)) { 2217 mask |= EV_WR|EV_WCLOSED; 2218 } 2219 break; 2220 2221 case EV_FIN: 2222 if (evq->ee_eventmask & EV_RE) { 2223 mask |= EV_RE|EV_FIN; 2224 } 2225 break; 2226 2227 case EV_RESET: 2228 case EV_TIMEOUT: 2229 if (evq->ee_eventmask & EV_RE) { 2230 mask |= EV_RE | event; 2231 } 2232 if (evq->ee_eventmask & EV_WR) { 2233 mask |= EV_WR | event; 2234 } 2235 break; 2236 2237 default: 2238 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0); 2239 return; 2240 } /* switch */ 2241 2242 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0); 2243 2244 if (mask) { 2245 /* 2246 * disarm... postevents are nops until this event is 'read' via 2247 * waitevent and then re-armed via modwatch 2248 */ 2249 evq->ee_eventmask = 0; 2250 2251 /* 2252 * since events are disarmed until after the waitevent 2253 * the ee_req.er_xxxx fields can't change once we've 2254 * inserted this event into the proc queue... 2255 * since waitevent can't see this event until we 2256 * enqueue it, waitevent will see a 'consistent' 2257 * snapshot of the event, even though it won't hold 2258 * the socket lock, and we're updating the event outside 2259 * of the proc lock, which it will hold 2260 */ 2261 evq->ee_req.er_eventbits |= mask; 2262 2263 evprocenque(evq); 2264 } 2265 } 2266 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0); 2267} 2268#endif /* SOCKETS */ 2269 2270 2271/* 2272 * watchevent system call. user passes us an event to watch 2273 * for. we malloc an event object, initialize it, and queue 2274 * it to the open socket. when the event occurs, postevent() 2275 * will enque it back to our proc where we can retrieve it 2276 * via waitevent(). 2277 * 2278 * should this prevent duplicate events on same socket? 2279 * 2280 * Returns: 2281 * ENOMEM No memory for operation 2282 * copyin:EFAULT 2283 */ 2284int 2285watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval) 2286{ 2287 struct eventqelt *evq = (struct eventqelt *)0; 2288 struct eventqelt *np = NULL; 2289 struct eventreq64 *erp; 2290 struct fileproc *fp = NULL; 2291 int error; 2292 2293 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0); 2294 2295 // get a qelt and fill with users req 2296 MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK); 2297 2298 if (evq == NULL) 2299 return (ENOMEM); 2300 erp = &evq->ee_req; 2301 2302 // get users request pkt 2303 2304 if (IS_64BIT_PROCESS(p)) { 2305 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64)); 2306 } else { 2307 struct eventreq32 er32; 2308 2309 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32)); 2310 if (error == 0) { 2311 /* 2312 * the user only passes in the 2313 * er_type, er_handle and er_data... 2314 * the other fields are initialized 2315 * below, so don't bother to copy 2316 */ 2317 erp->er_type = er32.er_type; 2318 erp->er_handle = er32.er_handle; 2319 erp->er_data = (user_addr_t)er32.er_data; 2320 } 2321 } 2322 if (error) { 2323 FREE(evq, M_TEMP); 2324 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0); 2325 2326 return(error); 2327 } 2328 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0); 2329 2330 // validate, freeing qelt if errors 2331 error = 0; 2332 proc_fdlock(p); 2333 2334 if (erp->er_type != EV_FD) { 2335 error = EINVAL; 2336 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) { 2337 error = EBADF; 2338#if SOCKETS 2339 } else if (fp->f_type == DTYPE_SOCKET) { 2340 socket_lock((struct socket *)fp->f_data, 1); 2341 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first; 2342#endif /* SOCKETS */ 2343 } else if (fp->f_type == DTYPE_PIPE) { 2344 PIPE_LOCK((struct pipe *)fp->f_data); 2345 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first; 2346 } else { 2347 fp_drop(p, erp->er_handle, fp, 1); 2348 error = EINVAL; 2349 } 2350 proc_fdunlock(p); 2351 2352 if (error) { 2353 FREE(evq, M_TEMP); 2354 2355 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0); 2356 return(error); 2357 } 2358 2359 /* 2360 * only allow one watch per file per proc 2361 */ 2362 for ( ; np != NULL; np = np->ee_slist.tqe_next) { 2363 if (np->ee_proc == p) { 2364#if SOCKETS 2365 if (fp->f_type == DTYPE_SOCKET) 2366 socket_unlock((struct socket *)fp->f_data, 1); 2367 else 2368#endif /* SOCKETS */ 2369 PIPE_UNLOCK((struct pipe *)fp->f_data); 2370 fp_drop(p, erp->er_handle, fp, 0); 2371 FREE(evq, M_TEMP); 2372 2373 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0); 2374 return(EINVAL); 2375 } 2376 } 2377 erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0; 2378 evq->ee_proc = p; 2379 evq->ee_eventmask = uap->u_eventmask & EV_MASK; 2380 evq->ee_flags = 0; 2381 2382#if SOCKETS 2383 if (fp->f_type == DTYPE_SOCKET) { 2384 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist); 2385 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events 2386 2387 socket_unlock((struct socket *)fp->f_data, 1); 2388 } else 2389#endif /* SOCKETS */ 2390 { 2391 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist); 2392 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES); 2393 2394 PIPE_UNLOCK((struct pipe *)fp->f_data); 2395 } 2396 fp_drop_event(p, erp->er_handle, fp); 2397 2398 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0); 2399 return(0); 2400} 2401 2402 2403 2404/* 2405 * waitevent system call. 2406 * grabs the next waiting event for this proc and returns 2407 * it. if no events, user can request to sleep with timeout 2408 * or without or poll mode 2409 * ((tv != NULL && interval == 0) || tv == -1) 2410 */ 2411int 2412waitevent(proc_t p, struct waitevent_args *uap, int *retval) 2413{ 2414 int error = 0; 2415 struct eventqelt *evq; 2416 struct eventreq64 *erp; 2417 uint64_t abstime, interval; 2418 boolean_t fast_poll = FALSE; 2419 union { 2420 struct eventreq64 er64; 2421 struct eventreq32 er32; 2422 } uer; 2423 2424 interval = 0; 2425 2426 if (uap->tv) { 2427 struct timeval atv; 2428 /* 2429 * check for fast poll method 2430 */ 2431 if (IS_64BIT_PROCESS(p)) { 2432 if (uap->tv == (user_addr_t)-1) 2433 fast_poll = TRUE; 2434 } else if (uap->tv == (user_addr_t)((uint32_t)-1)) 2435 fast_poll = TRUE; 2436 2437 if (fast_poll == TRUE) { 2438 if (p->p_evlist.tqh_first == NULL) { 2439 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0); 2440 /* 2441 * poll failed 2442 */ 2443 *retval = 1; 2444 return (0); 2445 } 2446 proc_lock(p); 2447 goto retry; 2448 } 2449 if (IS_64BIT_PROCESS(p)) { 2450 struct user64_timeval atv64; 2451 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64)); 2452 /* Loses resolution - assume timeout < 68 years */ 2453 atv.tv_sec = atv64.tv_sec; 2454 atv.tv_usec = atv64.tv_usec; 2455 } else { 2456 struct user32_timeval atv32; 2457 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32)); 2458 atv.tv_sec = atv32.tv_sec; 2459 atv.tv_usec = atv32.tv_usec; 2460 } 2461 2462 if (error) 2463 return(error); 2464 if (itimerfix(&atv)) { 2465 error = EINVAL; 2466 return(error); 2467 } 2468 interval = tvtoabstime(&atv); 2469 } 2470 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0); 2471 2472 proc_lock(p); 2473retry: 2474 if ((evq = p->p_evlist.tqh_first) != NULL) { 2475 /* 2476 * found one... make a local copy while it's still on the queue 2477 * to prevent it from changing while in the midst of copying 2478 * don't want to hold the proc lock across a copyout because 2479 * it might block on a page fault at the target in user space 2480 */ 2481 erp = &evq->ee_req; 2482 2483 if (IS_64BIT_PROCESS(p)) 2484 bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64)); 2485 else { 2486 uer.er32.er_type = erp->er_type; 2487 uer.er32.er_handle = erp->er_handle; 2488 uer.er32.er_data = (uint32_t)erp->er_data; 2489 uer.er32.er_ecnt = erp->er_ecnt; 2490 uer.er32.er_rcnt = erp->er_rcnt; 2491 uer.er32.er_wcnt = erp->er_wcnt; 2492 uer.er32.er_eventbits = erp->er_eventbits; 2493 } 2494 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); 2495 2496 evq->ee_flags &= ~EV_QUEUED; 2497 2498 proc_unlock(p); 2499 2500 if (IS_64BIT_PROCESS(p)) 2501 error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64)); 2502 else 2503 error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32)); 2504 2505 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error, 2506 evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0); 2507 return (error); 2508 } 2509 else { 2510 if (uap->tv && interval == 0) { 2511 proc_unlock(p); 2512 *retval = 1; // poll failed 2513 2514 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0); 2515 return (error); 2516 } 2517 if (interval != 0) 2518 clock_absolutetime_interval_to_deadline(interval, &abstime); 2519 else 2520 abstime = 0; 2521 2522 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0); 2523 2524 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime); 2525 2526 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0); 2527 2528 if (error == 0) 2529 goto retry; 2530 if (error == ERESTART) 2531 error = EINTR; 2532 if (error == EWOULDBLOCK) { 2533 *retval = 1; 2534 error = 0; 2535 } 2536 } 2537 proc_unlock(p); 2538 2539 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0); 2540 return (error); 2541} 2542 2543 2544/* 2545 * modwatch system call. user passes in event to modify. 2546 * if we find it we reset the event bits and que/deque event 2547 * it needed. 2548 */ 2549int 2550modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval) 2551{ 2552 struct eventreq64 er; 2553 struct eventreq64 *erp = &er; 2554 struct eventqelt *evq = NULL; /* protected by error return */ 2555 int error; 2556 struct fileproc *fp; 2557 int flag; 2558 2559 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0); 2560 2561 /* 2562 * get user's request pkt 2563 * just need the er_type and er_handle which sit above the 2564 * problematic er_data (32/64 issue)... so only copy in 2565 * those 2 fields 2566 */ 2567 if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) { 2568 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0); 2569 return(error); 2570 } 2571 proc_fdlock(p); 2572 2573 if (erp->er_type != EV_FD) { 2574 error = EINVAL; 2575 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) { 2576 error = EBADF; 2577#if SOCKETS 2578 } else if (fp->f_type == DTYPE_SOCKET) { 2579 socket_lock((struct socket *)fp->f_data, 1); 2580 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first; 2581#endif /* SOCKETS */ 2582 } else if (fp->f_type == DTYPE_PIPE) { 2583 PIPE_LOCK((struct pipe *)fp->f_data); 2584 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first; 2585 } else { 2586 fp_drop(p, erp->er_handle, fp, 1); 2587 error = EINVAL; 2588 } 2589 2590 if (error) { 2591 proc_fdunlock(p); 2592 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0); 2593 return(error); 2594 } 2595 2596 if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) { 2597 fp->f_flags &= ~FP_WAITEVENT; 2598 } 2599 proc_fdunlock(p); 2600 2601 // locate event if possible 2602 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) { 2603 if (evq->ee_proc == p) 2604 break; 2605 } 2606 if (evq == NULL) { 2607#if SOCKETS 2608 if (fp->f_type == DTYPE_SOCKET) 2609 socket_unlock((struct socket *)fp->f_data, 1); 2610 else 2611#endif /* SOCKETS */ 2612 PIPE_UNLOCK((struct pipe *)fp->f_data); 2613 fp_drop(p, erp->er_handle, fp, 0); 2614 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0); 2615 return(EINVAL); 2616 } 2617 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0); 2618 2619 if (uap->u_eventmask == EV_RM) { 2620 EVPROCDEQUE(p, evq); 2621 2622#if SOCKETS 2623 if (fp->f_type == DTYPE_SOCKET) { 2624 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist); 2625 socket_unlock((struct socket *)fp->f_data, 1); 2626 } else 2627#endif /* SOCKETS */ 2628 { 2629 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist); 2630 PIPE_UNLOCK((struct pipe *)fp->f_data); 2631 } 2632 fp_drop(p, erp->er_handle, fp, 0); 2633 FREE(evq, M_TEMP); 2634 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0); 2635 return(0); 2636 } 2637 switch (uap->u_eventmask & EV_MASK) { 2638 2639 case 0: 2640 flag = 0; 2641 break; 2642 2643 case EV_RE: 2644 case EV_WR: 2645 case EV_RE|EV_WR: 2646 flag = EV_RWBYTES; 2647 break; 2648 2649 case EV_EX: 2650 flag = EV_OOB; 2651 break; 2652 2653 case EV_EX|EV_RE: 2654 case EV_EX|EV_WR: 2655 case EV_EX|EV_RE|EV_WR: 2656 flag = EV_OOB|EV_RWBYTES; 2657 break; 2658 2659 default: 2660#if SOCKETS 2661 if (fp->f_type == DTYPE_SOCKET) 2662 socket_unlock((struct socket *)fp->f_data, 1); 2663 else 2664#endif /* SOCKETS */ 2665 PIPE_UNLOCK((struct pipe *)fp->f_data); 2666 fp_drop(p, erp->er_handle, fp, 0); 2667 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0); 2668 return(EINVAL); 2669 } 2670 /* 2671 * since we're holding the socket/pipe lock, the event 2672 * cannot go from the unqueued state to the queued state 2673 * however, it can go from the queued state to the unqueued state 2674 * since that direction is protected by the proc_lock... 2675 * so do a quick check for EV_QUEUED w/o holding the proc lock 2676 * since by far the common case will be NOT EV_QUEUED, this saves 2677 * us taking the proc_lock the majority of the time 2678 */ 2679 if (evq->ee_flags & EV_QUEUED) { 2680 /* 2681 * EVPROCDEQUE will recheck the state after it grabs the proc_lock 2682 */ 2683 EVPROCDEQUE(p, evq); 2684 } 2685 /* 2686 * while the event is off the proc queue and 2687 * we're holding the socket/pipe lock 2688 * it's safe to update these fields... 2689 */ 2690 evq->ee_req.er_eventbits = 0; 2691 evq->ee_eventmask = uap->u_eventmask & EV_MASK; 2692 2693#if SOCKETS 2694 if (fp->f_type == DTYPE_SOCKET) { 2695 postevent((struct socket *)fp->f_data, 0, flag); 2696 socket_unlock((struct socket *)fp->f_data, 1); 2697 } else 2698#endif /* SOCKETS */ 2699 { 2700 postpipeevent((struct pipe *)fp->f_data, flag); 2701 PIPE_UNLOCK((struct pipe *)fp->f_data); 2702 } 2703 fp_drop(p, erp->er_handle, fp, 0); 2704 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0); 2705 return(0); 2706} 2707 2708/* this routine is called from the close of fd with proc_fdlock held */ 2709int 2710waitevent_close(struct proc *p, struct fileproc *fp) 2711{ 2712 struct eventqelt *evq; 2713 2714 2715 fp->f_flags &= ~FP_WAITEVENT; 2716 2717#if SOCKETS 2718 if (fp->f_type == DTYPE_SOCKET) { 2719 socket_lock((struct socket *)fp->f_data, 1); 2720 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first; 2721 } else 2722#endif /* SOCKETS */ 2723 if (fp->f_type == DTYPE_PIPE) { 2724 PIPE_LOCK((struct pipe *)fp->f_data); 2725 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first; 2726 } 2727 else { 2728 return(EINVAL); 2729 } 2730 proc_fdunlock(p); 2731 2732 2733 // locate event if possible 2734 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) { 2735 if (evq->ee_proc == p) 2736 break; 2737 } 2738 if (evq == NULL) { 2739#if SOCKETS 2740 if (fp->f_type == DTYPE_SOCKET) 2741 socket_unlock((struct socket *)fp->f_data, 1); 2742 else 2743#endif /* SOCKETS */ 2744 PIPE_UNLOCK((struct pipe *)fp->f_data); 2745 2746 proc_fdlock(p); 2747 2748 return(EINVAL); 2749 } 2750 EVPROCDEQUE(p, evq); 2751 2752#if SOCKETS 2753 if (fp->f_type == DTYPE_SOCKET) { 2754 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist); 2755 socket_unlock((struct socket *)fp->f_data, 1); 2756 } else 2757#endif /* SOCKETS */ 2758 { 2759 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist); 2760 PIPE_UNLOCK((struct pipe *)fp->f_data); 2761 } 2762 FREE(evq, M_TEMP); 2763 2764 proc_fdlock(p); 2765 2766 return(0); 2767} 2768 2769 2770/* 2771 * gethostuuid 2772 * 2773 * Description: Get the host UUID from IOKit and return it to user space. 2774 * 2775 * Parameters: uuid_buf Pointer to buffer to receive UUID 2776 * timeout Timespec for timout 2777 * 2778 * Returns: 0 Success 2779 * EWOULDBLOCK Timeout is too short 2780 * copyout:EFAULT Bad user buffer 2781 * 2782 * Notes: A timeout seems redundant, since if it's tolerable to not 2783 * have a system UUID in hand, then why ask for one? 2784 */ 2785int 2786gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval) 2787{ 2788 kern_return_t kret; 2789 int error; 2790 mach_timespec_t mach_ts; /* for IOKit call */ 2791 __darwin_uuid_t uuid_kern; /* for IOKit call */ 2792 2793 /* Convert the 32/64 bit timespec into a mach_timespec_t */ 2794 if ( proc_is64bit(p) ) { 2795 struct user64_timespec ts; 2796 error = copyin(uap->timeoutp, &ts, sizeof(ts)); 2797 if (error) 2798 return (error); 2799 mach_ts.tv_sec = ts.tv_sec; 2800 mach_ts.tv_nsec = ts.tv_nsec; 2801 } else { 2802 struct user32_timespec ts; 2803 error = copyin(uap->timeoutp, &ts, sizeof(ts) ); 2804 if (error) 2805 return (error); 2806 mach_ts.tv_sec = ts.tv_sec; 2807 mach_ts.tv_nsec = ts.tv_nsec; 2808 } 2809 2810 /* Call IOKit with the stack buffer to get the UUID */ 2811 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts); 2812 2813 /* 2814 * If we get it, copy out the data to the user buffer; note that a 2815 * uuid_t is an array of characters, so this is size invariant for 2816 * 32 vs. 64 bit. 2817 */ 2818 if (kret == KERN_SUCCESS) { 2819 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern)); 2820 } else { 2821 error = EWOULDBLOCK; 2822 } 2823 2824 return (error); 2825} 2826 2827/* 2828 * ledger 2829 * 2830 * Description: Omnibus system call for ledger operations 2831 */ 2832int 2833ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval) 2834{ 2835 int rval, pid, len, error; 2836#ifdef LEDGER_DEBUG 2837 struct ledger_limit_args lla; 2838#endif 2839 task_t task; 2840 proc_t proc; 2841 2842 /* Finish copying in the necessary args before taking the proc lock */ 2843 error = 0; 2844 len = 0; 2845 if (args->cmd == LEDGER_ENTRY_INFO) 2846 error = copyin(args->arg3, (char *)&len, sizeof (len)); 2847 else if (args->cmd == LEDGER_TEMPLATE_INFO) 2848 error = copyin(args->arg2, (char *)&len, sizeof (len)); 2849#ifdef LEDGER_DEBUG 2850 else if (args->cmd == LEDGER_LIMIT) 2851 error = copyin(args->arg2, (char *)&lla, sizeof (lla)); 2852#endif 2853 if (error) 2854 return (error); 2855 if (len < 0) 2856 return (EINVAL); 2857 2858 rval = 0; 2859 if (args->cmd != LEDGER_TEMPLATE_INFO) { 2860 pid = args->arg1; 2861 proc = proc_find(pid); 2862 if (proc == NULL) 2863 return (ESRCH); 2864 2865#if CONFIG_MACF 2866 error = mac_proc_check_ledger(p, proc, args->cmd); 2867 if (error) { 2868 proc_rele(proc); 2869 return (error); 2870 } 2871#endif 2872 2873 task = proc->task; 2874 } 2875 2876 switch (args->cmd) { 2877#ifdef LEDGER_DEBUG 2878 case LEDGER_LIMIT: { 2879 if (!is_suser()) 2880 rval = EPERM; 2881 rval = ledger_limit(task, &lla); 2882 proc_rele(proc); 2883 break; 2884 } 2885#endif 2886 case LEDGER_INFO: { 2887 struct ledger_info info; 2888 2889 rval = ledger_info(task, &info); 2890 proc_rele(proc); 2891 if (rval == 0) 2892 rval = copyout(&info, args->arg2, 2893 sizeof (info)); 2894 break; 2895 } 2896 2897 case LEDGER_ENTRY_INFO: { 2898 void *buf; 2899 int sz; 2900 2901 rval = ledger_entry_info(task, &buf, &len); 2902 proc_rele(proc); 2903 if ((rval == 0) && (len > 0)) { 2904 sz = len * sizeof (struct ledger_entry_info); 2905 rval = copyout(buf, args->arg2, sz); 2906 kfree(buf, sz); 2907 } 2908 if (rval == 0) 2909 rval = copyout(&len, args->arg3, sizeof (len)); 2910 break; 2911 } 2912 2913 case LEDGER_TEMPLATE_INFO: { 2914 void *buf; 2915 int sz; 2916 2917 rval = ledger_template_info(&buf, &len); 2918 if ((rval == 0) && (len > 0)) { 2919 sz = len * sizeof (struct ledger_template_info); 2920 rval = copyout(buf, args->arg1, sz); 2921 kfree(buf, sz); 2922 } 2923 if (rval == 0) 2924 rval = copyout(&len, args->arg2, sizeof (len)); 2925 break; 2926 } 2927 2928 default: 2929 rval = EINVAL; 2930 } 2931 2932 return (rval); 2933} 2934