1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 *    must display the following acknowledgement:
48 *	This product includes software developed by the University of
49 *	California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 *    may be used to endorse or promote products derived from this software
52 *    without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
67 */
68/*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections.  This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
79#include <sys/vnode_internal.h>
80#include <sys/proc_internal.h>
81#include <sys/kauth.h>
82#include <sys/file_internal.h>
83#include <sys/socket.h>
84#include <sys/socketvar.h>
85#include <sys/stat.h>
86#include <sys/ioctl.h>
87#include <sys/fcntl.h>
88#include <sys/malloc.h>
89#include <sys/mman.h>
90#include <sys/syslog.h>
91#include <sys/unistd.h>
92#include <sys/resourcevar.h>
93#include <sys/aio_kern.h>
94#include <sys/ev.h>
95#include <kern/lock.h>
96#include <sys/uio_internal.h>
97
98#include <security/audit/audit.h>
99
100#include <sys/mount_internal.h>
101#include <sys/kdebug.h>
102#include <sys/sysproto.h>
103#include <sys/pipe.h>
104#include <sys/spawn.h>
105#include <kern/kern_types.h>
106#include <kern/kalloc.h>
107#include <libkern/OSAtomic.h>
108
109#include <sys/ubc_internal.h>
110
111#include <kern/ipc_misc.h>
112#include <vm/vm_protos.h>
113
114#include <mach/mach_port.h>
115
116#if CONFIG_PROTECT
117#include <sys/cprotect.h>
118#endif
119#include <hfs/hfs.h>
120
121kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
122    mach_msg_type_name_t, ipc_port_t *);
123void ipc_port_release_send(ipc_port_t);
124
125struct psemnode;
126struct pshmnode;
127
128static int finishdup(proc_t p,
129    struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
130
131int falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx, int locked);
132void fg_drop(struct fileproc * fp);
133void fg_free(struct fileglob *fg);
134void fg_ref(struct fileproc * fp);
135void fileport_releasefg(struct fileglob *fg);
136
137/* flags for close_internal_locked */
138#define FD_DUP2RESV 1
139static int close_internal_locked(struct proc *p, int fd, struct fileproc *fp, int flags);
140
141static int closef_finish(struct fileproc *fp, struct fileglob *fg, proc_t p, vfs_context_t ctx);
142
143/* We don't want these exported */
144__private_extern__
145int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
146
147__private_extern__
148int unlink1(vfs_context_t, struct nameidata *, int);
149
150static void _fdrelse(struct proc * p, int fd);
151
152
153extern void file_lock_init(void) __attribute__((section("__TEXT, initcode")));
154extern int kqueue_stat(struct fileproc *fp, void *ub, int isstat4, proc_t p);
155#if SOCKETS
156extern int soo_stat(struct socket *so, void *ub, int isstat64);
157#endif /* SOCKETS */
158
159extern kauth_scope_t	kauth_scope_fileop;
160
161extern int cs_debug;
162
163/* Conflict wait queue for when selects collide (opaque type) */
164extern struct wait_queue select_conflict_queue;
165
166#define f_flag f_fglob->fg_flag
167#define f_type f_fglob->fg_type
168#define f_msgcount f_fglob->fg_msgcount
169#define f_cred f_fglob->fg_cred
170#define f_ops f_fglob->fg_ops
171#define f_offset f_fglob->fg_offset
172#define f_data f_fglob->fg_data
173/*
174 * Descriptor management.
175 */
176struct fmsglist fmsghead;	/* head of list of open files */
177struct fmsglist fmsg_ithead;	/* head of list of open files */
178int nfiles;			/* actual number of open files */
179
180
181lck_grp_attr_t * file_lck_grp_attr;
182lck_grp_t * file_lck_grp;
183lck_attr_t * file_lck_attr;
184
185lck_mtx_t * uipc_lock;
186
187
188/*
189 * file_lock_init
190 *
191 * Description:	Initialize the file lock group and the uipc and flist locks
192 *
193 * Parameters:	(void)
194 *
195 * Returns:	void
196 *
197 * Notes:	Called at system startup from bsd_init().
198 */
199void
200file_lock_init(void)
201{
202	/* allocate file lock group attribute and group */
203	file_lck_grp_attr= lck_grp_attr_alloc_init();
204
205	file_lck_grp = lck_grp_alloc_init("file",  file_lck_grp_attr);
206
207	/* Allocate file lock attribute */
208	file_lck_attr = lck_attr_alloc_init();
209
210	uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
211}
212
213
214/*
215 * proc_fdlock, proc_fdlock_spin
216 *
217 * Description:	Lock to control access to the per process struct fileproc
218 *		and struct filedesc
219 *
220 * Parameters:	p				Process to take the lock on
221 *
222 * Returns:	void
223 *
224 * Notes:	The lock is initialized in forkproc() and destroyed in
225 *		reap_child_process().
226 */
227void
228proc_fdlock(proc_t p)
229{
230	lck_mtx_lock(&p->p_fdmlock);
231}
232
233void
234proc_fdlock_spin(proc_t p)
235{
236	lck_mtx_lock_spin(&p->p_fdmlock);
237}
238
239void
240proc_fdlock_assert(proc_t p, int assertflags)
241{
242	lck_mtx_assert(&p->p_fdmlock, assertflags);
243}
244
245
246/*
247 * proc_fdunlock
248 *
249 * Description:	Unlock the lock previously locked by a call to proc_fdlock()
250 *
251 * Parameters:	p				Process to drop the lock on
252 *
253 * Returns:	void
254 */
255void
256proc_fdunlock(proc_t p)
257{
258	lck_mtx_unlock(&p->p_fdmlock);
259}
260
261
262/*
263 * System calls on descriptors.
264 */
265
266
267/*
268 * getdtablesize
269 *
270 * Description:	Returns the per process maximum size of the descriptor table
271 *
272 * Parameters:	p				Process being queried
273 *		retval				Pointer to the call return area
274 *
275 * Returns:	0				Success
276 *
277 * Implicit returns:
278 *		*retval (modified)		Size of dtable
279 */
280int
281getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
282{
283	proc_fdlock_spin(p);
284	*retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
285	proc_fdunlock(p);
286
287	return (0);
288}
289
290
291void
292procfdtbl_reservefd(struct proc * p, int fd)
293{
294	p->p_fd->fd_ofiles[fd] = NULL;
295        p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
296}
297
298void
299procfdtbl_markclosefd(struct proc * p, int fd)
300{
301        p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
302}
303
304void
305procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
306{
307	if (fp != NULL)
308        	p->p_fd->fd_ofiles[fd] = fp;
309        p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
310	if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
311		p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
312		wakeup(&p->p_fd);
313	}
314}
315
316void
317procfdtbl_waitfd(struct proc * p, int fd)
318{
319        p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
320	msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
321}
322
323
324void
325procfdtbl_clearfd(struct proc * p, int fd)
326{
327	int waiting;
328
329	waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
330	p->p_fd->fd_ofiles[fd] = NULL;
331	p->p_fd->fd_ofileflags[fd] = 0;
332	if ( waiting == UF_RESVWAIT) {
333		wakeup(&p->p_fd);
334	}
335}
336
337/*
338 * _fdrelse
339 *
340 * Description:	Inline utility function to free an fd in a filedesc
341 *
342 * Parameters:	fdp				Pointer to filedesc fd lies in
343 *		fd				fd to free
344 *		reserv				fd should be reserved
345 *
346 * Returns:	void
347 *
348 * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
349 *		the caller
350 */
351static void
352_fdrelse(struct proc * p, int fd)
353{
354	struct filedesc *fdp = p->p_fd;
355	int nfd = 0;
356
357	if (fd < fdp->fd_freefile)
358		fdp->fd_freefile = fd;
359#if DIAGNOSTIC
360	if (fd > fdp->fd_lastfile)
361 		panic("fdrelse: fd_lastfile inconsistent");
362#endif
363	procfdtbl_clearfd(p, fd);
364
365	while ((nfd = fdp->fd_lastfile) > 0 &&
366			fdp->fd_ofiles[nfd] == NULL &&
367			!(fdp->fd_ofileflags[nfd] & UF_RESERVED))
368		fdp->fd_lastfile--;
369}
370
371
372int
373fd_rdwr(
374	int fd,
375	enum uio_rw rw,
376	uint64_t base,
377	int64_t len,
378	enum uio_seg segflg,
379	off_t	offset,
380	int	io_flg,
381	int64_t *aresid)
382{
383        struct fileproc *fp;
384	proc_t	p;
385        int error = 0;
386	int flags = 0;
387	int spacetype;
388	uio_t auio = NULL;
389	char uio_buf[ UIO_SIZEOF(1) ];
390	struct vfs_context context = *(vfs_context_current());
391
392	p = current_proc();
393
394        error = fp_lookup(p, fd, &fp, 0);
395        if (error)
396                return(error);
397
398	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_PIPE && fp->f_type != DTYPE_SOCKET) {
399		error = EINVAL;
400		goto out;
401	}
402	if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
403                error = EBADF;
404		goto out;
405	}
406
407	if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
408    		error = EBADF;
409    		goto out;
410	}
411
412	context.vc_ucred = fp->f_fglob->fg_cred;
413
414	if (UIO_SEG_IS_USER_SPACE(segflg))
415		spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
416	else
417		spacetype = UIO_SYSSPACE;
418
419	auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
420
421	uio_addiov(auio, base, len);
422
423	if ( !(io_flg & IO_APPEND))
424		flags = FOF_OFFSET;
425
426	if (rw == UIO_WRITE)
427		error = fo_write(fp, auio, flags, &context);
428	else
429		error = fo_read(fp, auio, flags, &context);
430
431	if (aresid)
432		*aresid = uio_resid(auio);
433	else {
434		if (uio_resid(auio) && error == 0)
435			error = EIO;
436	}
437out:
438        if (rw == UIO_WRITE && error == 0)
439                fp_drop_written(p, fd, fp);
440        else
441                fp_drop(p, fd, fp, 0);
442
443	return error;
444}
445
446
447
448/*
449 * dup
450 *
451 * Description:	Duplicate a file descriptor.
452 *
453 * Parameters:	p				Process performing the dup
454 *		uap->fd				The fd to dup
455 *		retval				Pointer to the call return area
456 *
457 * Returns:	0				Success
458 *		!0				Errno
459 *
460 * Implicit returns:
461 *		*retval (modified)		The new descriptor
462 */
463int
464dup(proc_t p, struct dup_args *uap, int32_t *retval)
465{
466	struct filedesc *fdp = p->p_fd;
467	int old = uap->fd;
468	int new, error;
469	struct fileproc *fp;
470
471	proc_fdlock(p);
472	if ( (error = fp_lookup(p, old, &fp, 1)) ) {
473		proc_fdunlock(p);
474		return(error);
475	}
476	if ( (error = fdalloc(p, 0, &new)) ) {
477		fp_drop(p, old, fp, 1);
478		proc_fdunlock(p);
479		return (error);
480	}
481	error = finishdup(p, fdp, old, new, 0, retval);
482	fp_drop(p, old, fp, 1);
483	proc_fdunlock(p);
484
485	return (error);
486}
487
488/*
489 * dup2
490 *
491 * Description:	Duplicate a file descriptor to a particular value.
492 *
493 * Parameters:	p				Process performing the dup
494 *		uap->from			The fd to dup
495 *		uap->to				The fd to dup it to
496 *		retval				Pointer to the call return area
497 *
498 * Returns:	0				Success
499 *		!0				Errno
500 *
501 * Implicit returns:
502 *		*retval (modified)		The new descriptor
503 */
504int
505dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
506{
507	struct filedesc *fdp = p->p_fd;
508	int old = uap->from, new = uap->to;
509	int i, error;
510	struct fileproc *fp, *nfp;
511
512	proc_fdlock(p);
513
514startover:
515	if ( (error = fp_lookup(p, old, &fp, 1)) ) {
516		proc_fdunlock(p);
517		return(error);
518	}
519	if (new < 0 ||
520		(rlim_t)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
521	    new >= maxfiles) {
522		fp_drop(p, old, fp, 1);
523		proc_fdunlock(p);
524		return (EBADF);
525	}
526	if (old == new) {
527		fp_drop(p, old, fp, 1);
528		*retval = new;
529		proc_fdunlock(p);
530		return (0);
531	}
532	if (new < 0 || new >= fdp->fd_nfiles) {
533		if ( (error = fdalloc(p, new, &i)) ) {
534			fp_drop(p, old, fp, 1);
535			proc_fdunlock(p);
536			return (error);
537		}
538		if (new != i) {
539			fdrelse(p, i);
540			goto closeit;
541		}
542	} else {
543closeit:
544		while ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED)  {
545				fp_drop(p, old, fp, 1);
546				procfdtbl_waitfd(p, new);
547#if DIAGNOSTIC
548				proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
549#endif
550				goto startover;
551		}
552
553		if ((fdp->fd_ofiles[new] != NULL) &&
554		    ((error = fp_lookup(p, new, &nfp, 1)) == 0)) {
555			fp_drop(p, old, fp, 1);
556			(void)close_internal_locked(p, new, nfp, FD_DUP2RESV);
557#if DIAGNOSTIC
558			proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
559#endif
560			procfdtbl_clearfd(p, new);
561			goto startover;
562		} else  {
563#if DIAGNOSTIC
564			if (fdp->fd_ofiles[new] != NULL)
565				panic("dup2: no ref on fileproc %d", new);
566#endif
567			procfdtbl_reservefd(p, new);
568		}
569
570#if DIAGNOSTIC
571		proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
572#endif
573
574	}
575#if DIAGNOSTIC
576	if (fdp->fd_ofiles[new] != 0)
577		panic("dup2: overwriting fd_ofiles with new %d", new);
578	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
579		panic("dup2: unreserved fileflags with new %d", new);
580#endif
581	error = finishdup(p, fdp, old, new, 0, retval);
582	fp_drop(p, old, fp, 1);
583	proc_fdunlock(p);
584
585	return(error);
586}
587
588
589/*
590 * fcntl
591 *
592 * Description:	The file control system call.
593 *
594 * Parameters:	p				Process performing the fcntl
595 *		uap->fd				The fd to operate against
596 *		uap->cmd			The command to perform
597 *		uap->arg			Pointer to the command argument
598 *		retval				Pointer to the call return area
599 *
600 * Returns:	0				Success
601 *		!0				Errno (see fcntl_nocancel)
602 *
603 * Implicit returns:
604 *		*retval (modified)		fcntl return value (if any)
605 *
606 * Notes:	This system call differs from fcntl_nocancel() in that it
607 *		tests for cancellation prior to performing a potentially
608 *		blocking operation.
609 */
610int
611fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
612{
613	__pthread_testcancel(1);
614	return(fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval));
615}
616
617
618/*
619 * fcntl_nocancel
620 *
621 * Description:	A non-cancel-testing file control system call.
622 *
623 * Parameters:	p				Process performing the fcntl
624 *		uap->fd				The fd to operate against
625 *		uap->cmd			The command to perform
626 *		uap->arg			Pointer to the command argument
627 *		retval				Pointer to the call return area
628 *
629 * Returns:	0				Success
630 *		EINVAL
631 *	fp_lookup:EBADF				Bad file descriptor
632 * [F_DUPFD]
633 *	fdalloc:EMFILE
634 *	fdalloc:ENOMEM
635 *	finishdup:EBADF
636 *	finishdup:ENOMEM
637 * [F_SETOWN]
638 *		ESRCH
639 * [F_SETLK]
640 *		EBADF
641 *		EOVERFLOW
642 *	copyin:EFAULT
643 *	vnode_getwithref:???
644 *	VNOP_ADVLOCK:???
645 * [F_GETLK]
646 *		EBADF
647 *		EOVERFLOW
648 *	copyin:EFAULT
649 *	copyout:EFAULT
650 *	vnode_getwithref:???
651 *	VNOP_ADVLOCK:???
652 * [F_PREALLOCATE]
653 *		EBADF
654 *		EINVAL
655 *	copyin:EFAULT
656 *	copyout:EFAULT
657 *	vnode_getwithref:???
658 *	VNOP_ALLOCATE:???
659 * [F_SETSIZE,F_RDADVISE]
660 *		EBADF
661 *	copyin:EFAULT
662 *	vnode_getwithref:???
663 * [F_RDAHEAD,F_NOCACHE]
664 *		EBADF
665 *	vnode_getwithref:???
666 * [???]
667 *
668 * Implicit returns:
669 *		*retval (modified)		fcntl return value (if any)
670 */
671int
672fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
673{
674	int fd = uap->fd;
675	struct filedesc *fdp = p->p_fd;
676	struct fileproc *fp;
677	char *pop;
678	struct vnode *vp = NULLVP;	/* for AUDIT_ARG() at end */
679	int i, tmp, error, error2, flg = F_POSIX;
680	struct flock fl;
681	struct vfs_context context;
682	off_t offset;
683	int newmin;
684	daddr64_t lbn, bn;
685	unsigned int fflag;
686	user_addr_t argp;
687	boolean_t is64bit;
688
689	AUDIT_ARG(fd, uap->fd);
690	AUDIT_ARG(cmd, uap->cmd);
691
692	proc_fdlock(p);
693	if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
694		proc_fdunlock(p);
695		return(error);
696	}
697	context.vc_thread = current_thread();
698	context.vc_ucred = fp->f_cred;
699
700	is64bit = proc_is64bit(p);
701	if (is64bit) {
702		argp = uap->arg;
703	}
704	else {
705		/*
706		 * Since the arg parameter is defined as a long but may be
707		 * either a long or a pointer we must take care to handle
708		 * sign extension issues.  Our sys call munger will sign
709		 * extend a long when we are called from a 32-bit process.
710		 * Since we can never have an address greater than 32-bits
711		 * from a 32-bit process we lop off the top 32-bits to avoid
712		 * getting the wrong address
713		 */
714		argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
715	}
716
717	pop = &fdp->fd_ofileflags[fd];
718
719#if CONFIG_MACF
720	error = mac_file_check_fcntl(proc_ucred(p), fp->f_fglob, uap->cmd,
721	    uap->arg);
722	if (error)
723		goto out;
724#endif
725
726	switch (uap->cmd) {
727
728	case F_DUPFD:
729	case F_DUPFD_CLOEXEC:
730		newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
731		AUDIT_ARG(value32, newmin);
732		if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
733		    newmin >= maxfiles) {
734			error = EINVAL;
735			goto out;
736		}
737		if ( (error = fdalloc(p, newmin, &i)) )
738			goto out;
739		error = finishdup(p, fdp, fd, i,
740		    uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
741		goto out;
742
743	case F_GETFD:
744		*retval = (*pop & UF_EXCLOSE)? 1 : 0;
745		error = 0;
746		goto out;
747
748	case F_SETFD:
749		AUDIT_ARG(value32, uap->arg);
750		*pop = (*pop &~ UF_EXCLOSE) |
751			(uap->arg & 1)? UF_EXCLOSE : 0;
752		error = 0;
753		goto out;
754
755	case F_GETFL:
756		*retval = OFLAGS(fp->f_flag);
757		error = 0;
758		goto out;
759
760	case F_SETFL:
761		fp->f_flag &= ~FCNTLFLAGS;
762		tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
763		AUDIT_ARG(value32, tmp);
764		fp->f_flag |= FFLAGS(tmp) & FCNTLFLAGS;
765		tmp = fp->f_flag & FNONBLOCK;
766		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
767		if (error)
768			goto out;
769		tmp = fp->f_flag & FASYNC;
770		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
771		if (!error)
772			goto out;
773		fp->f_flag &= ~FNONBLOCK;
774		tmp = 0;
775		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
776		goto out;
777
778	case F_GETOWN:
779		if (fp->f_type == DTYPE_SOCKET) {
780			*retval = ((struct socket *)fp->f_data)->so_pgid;
781			error = 0;
782			goto out;
783		}
784		error = fo_ioctl(fp, (int)TIOCGPGRP, (caddr_t)retval, &context);
785		*retval = -*retval;
786		goto out;
787
788	case F_SETOWN:
789		tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
790		AUDIT_ARG(value32, tmp);
791		if (fp->f_type == DTYPE_SOCKET) {
792			((struct socket *)fp->f_data)->so_pgid = tmp;
793			error =0;
794			goto out;
795		}
796		if (fp->f_type == DTYPE_PIPE) {
797			error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
798			goto out;
799		}
800
801		if (tmp <= 0) {
802			tmp = -tmp;
803		} else {
804			proc_t p1 = proc_find(tmp);
805			if (p1 == 0) {
806				error = ESRCH;
807				goto out;
808			}
809			tmp = (int)p1->p_pgrpid;
810			proc_rele(p1);
811		}
812		error =  fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
813		goto out;
814
815	case F_SETNOSIGPIPE:
816		tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
817		if (fp->f_type == DTYPE_SOCKET) {
818			error = sock_setsockopt((struct socket *)fp->f_data,
819			    SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof (tmp));
820		} else {
821			struct fileglob *fg = fp->f_fglob;
822
823			lck_mtx_lock_spin(&fg->fg_lock);
824			if (tmp)
825				fg->fg_lflags |= FG_NOSIGPIPE;
826			else
827				fg->fg_lflags &= FG_NOSIGPIPE;
828			lck_mtx_unlock(&fg->fg_lock);
829			error = 0;
830		}
831		goto out;
832
833	case F_GETNOSIGPIPE:
834		if (fp->f_type == DTYPE_SOCKET) {
835			int retsize = sizeof (*retval);
836			error = sock_getsockopt((struct socket *)fp->f_data,
837			    SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
838		} else {
839			*retval = (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) ?
840				1 : 0;
841			error = 0;
842		}
843		goto out;
844
845	case F_SETLKW:
846		flg |= F_WAIT;
847		/* Fall into F_SETLK */
848
849	case F_SETLK:
850		if (fp->f_type != DTYPE_VNODE) {
851			error = EBADF;
852			goto out;
853		}
854		vp = (struct vnode *)fp->f_data;
855
856		fflag = fp->f_flag;
857		offset = fp->f_offset;
858		proc_fdunlock(p);
859
860		/* Copy in the lock structure */
861		error = copyin(argp, (caddr_t)&fl, sizeof(fl));
862		if (error) {
863			goto outdrop;
864		}
865
866		volatile off_t affected_lock_area_set = 0;
867		affected_lock_area_set = fl.l_start + offset;
868		if ((fl.l_whence == SEEK_CUR) && (affected_lock_area_set < fl.l_start)) {
869		    error = EOVERFLOW;
870		    goto outdrop;
871		}
872
873		if ( (error = vnode_getwithref(vp)) ) {
874			goto outdrop;
875		}
876		if (fl.l_whence == SEEK_CUR)
877			fl.l_start += offset;
878
879#if CONFIG_MACF
880		error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
881		    F_SETLK, &fl);
882		if (error) {
883			(void)vnode_put(vp);
884			goto outdrop;
885		}
886#endif
887		switch (fl.l_type) {
888
889		case F_RDLCK:
890			if ((fflag & FREAD) == 0) {
891				(void)vnode_put(vp);
892				error = EBADF;
893				goto outdrop;
894			}
895			// XXX UInt32 unsafe for LP64 kernel
896			OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
897			error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context);
898			(void)vnode_put(vp);
899			goto outdrop;
900
901		case F_WRLCK:
902			if ((fflag & FWRITE) == 0) {
903				(void)vnode_put(vp);
904				error = EBADF;
905				goto outdrop;
906			}
907			// XXX UInt32 unsafe for LP64 kernel
908			OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
909			error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context);
910			(void)vnode_put(vp);
911			goto outdrop;
912
913		case F_UNLCK:
914			error = VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
915				F_POSIX, &context);
916			(void)vnode_put(vp);
917			goto outdrop;
918
919		default:
920			(void)vnode_put(vp);
921			error = EINVAL;
922			goto outdrop;
923		}
924
925	case F_GETLK:
926#if CONFIG_EMBEDDED
927	case F_GETLKPID:
928#endif
929		if (fp->f_type != DTYPE_VNODE) {
930			error = EBADF;
931			goto out;
932		}
933		vp = (struct vnode *)fp->f_data;
934
935		offset = fp->f_offset;
936		proc_fdunlock(p);
937
938		/* Copy in the lock structure */
939		error = copyin(argp, (caddr_t)&fl, sizeof(fl));
940		if (error)
941			goto outdrop;
942
943		volatile off_t affected_lock_area_end = 0;
944		affected_lock_area_end = fl.l_start + offset;
945		/* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
946		/* and ending byte for EOVERFLOW in SEEK_SET */
947		if (((fl.l_whence == SEEK_CUR) &&
948		     ((affected_lock_area_end < fl.l_start) ||
949		      ((fl.l_len > 0) && (affected_lock_area_end + fl.l_len - 1 < affected_lock_area_end)))) ||
950		    ((fl.l_whence == SEEK_SET) && (fl.l_len > 0) && (fl.l_start + fl.l_len - 1 < fl.l_start)))
951		{
952			/* lf_advlock doesn't check start/end for F_GETLK if file has no locks */
953			error = EOVERFLOW;
954			goto outdrop;
955		}
956
957		if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
958			error = EINVAL;
959			goto outdrop;
960		}
961
962		switch (fl.l_type) {
963		case F_RDLCK:
964		case F_UNLCK:
965		case F_WRLCK:
966			break;
967		default:
968			error = EINVAL;
969			goto outdrop;
970		}
971
972		switch (fl.l_whence) {
973		case SEEK_CUR:
974		case SEEK_SET:
975		case SEEK_END:
976			break;
977		default:
978			error = EINVAL;
979			goto outdrop;
980		}
981
982		if ( (error = vnode_getwithref(vp)) == 0 ) {
983			if (fl.l_whence == SEEK_CUR)
984			        fl.l_start += offset;
985
986#if CONFIG_MACF
987			error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
988			    uap->cmd, &fl);
989			if (error == 0)
990#endif
991			error = VNOP_ADVLOCK(vp, (caddr_t)p, uap->cmd, &fl, F_POSIX, &context);
992
993			(void)vnode_put(vp);
994
995			if (error == 0)
996				error = copyout((caddr_t)&fl, argp, sizeof(fl));
997		}
998		goto outdrop;
999
1000	case F_PREALLOCATE: {
1001		fstore_t alloc_struct;    /* structure for allocate command */
1002		u_int32_t alloc_flags = 0;
1003
1004		if (fp->f_type != DTYPE_VNODE) {
1005			error = EBADF;
1006			goto out;
1007		}
1008
1009		vp = (struct vnode *)fp->f_data;
1010		proc_fdunlock(p);
1011
1012		/* make sure that we have write permission */
1013		if ((fp->f_flag & FWRITE) == 0) {
1014			error = EBADF;
1015			goto outdrop;
1016		}
1017
1018		error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1019		if (error)
1020			goto outdrop;
1021
1022		/* now set the space allocated to 0 */
1023		alloc_struct.fst_bytesalloc = 0;
1024
1025		/*
1026		 * Do some simple parameter checking
1027		 */
1028
1029		/* set up the flags */
1030
1031		alloc_flags |= PREALLOCATE;
1032
1033		if (alloc_struct.fst_flags & F_ALLOCATECONTIG)
1034			alloc_flags |= ALLOCATECONTIG;
1035
1036		if (alloc_struct.fst_flags & F_ALLOCATEALL)
1037			alloc_flags |= ALLOCATEALL;
1038
1039		/*
1040		 * Do any position mode specific stuff.  The only
1041		 * position mode  supported now is PEOFPOSMODE
1042		 */
1043
1044		switch (alloc_struct.fst_posmode) {
1045
1046		case F_PEOFPOSMODE:
1047			if (alloc_struct.fst_offset != 0) {
1048				error = EINVAL;
1049				goto outdrop;
1050			}
1051
1052			alloc_flags |= ALLOCATEFROMPEOF;
1053			break;
1054
1055		case F_VOLPOSMODE:
1056			if (alloc_struct.fst_offset <= 0) {
1057				error = EINVAL;
1058				goto outdrop;
1059			}
1060
1061			alloc_flags |= ALLOCATEFROMVOL;
1062			break;
1063
1064		default: {
1065			error = EINVAL;
1066			goto outdrop;
1067			}
1068		}
1069		if ( (error = vnode_getwithref(vp)) == 0 ) {
1070		        /*
1071			 * call allocate to get the space
1072			 */
1073		        error = VNOP_ALLOCATE(vp,alloc_struct.fst_length,alloc_flags,
1074					      &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1075					      &context);
1076			(void)vnode_put(vp);
1077
1078			error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1079
1080			if (error == 0)
1081				error = error2;
1082		}
1083		goto outdrop;
1084
1085		}
1086	case F_SETSIZE:
1087		if (fp->f_type != DTYPE_VNODE) {
1088			error = EBADF;
1089			goto out;
1090		}
1091		vp = (struct vnode *)fp->f_data;
1092		proc_fdunlock(p);
1093
1094		error = copyin(argp, (caddr_t)&offset, sizeof (off_t));
1095		if (error)
1096			goto outdrop;
1097		AUDIT_ARG(value64, offset);
1098
1099		error = vnode_getwithref(vp);
1100		if (error)
1101			goto outdrop;
1102
1103#if CONFIG_MACF
1104		error = mac_vnode_check_truncate(&context,
1105		    fp->f_fglob->fg_cred, vp);
1106		if (error) {
1107			(void)vnode_put(vp);
1108			goto outdrop;
1109		}
1110#endif
1111		/*
1112		 * Make sure that we are root.  Growing a file
1113		 * without zero filling the data is a security hole
1114		 * root would have access anyway so we'll allow it
1115		 */
1116		if (!is_suser()) {
1117			error = EACCES;
1118		} else {
1119			/*
1120			 * set the file size
1121			 */
1122			error = vnode_setsize(vp, offset, IO_NOZEROFILL,
1123			    &context);
1124		}
1125
1126		(void)vnode_put(vp);
1127		goto outdrop;
1128
1129	case F_RDAHEAD:
1130		if (fp->f_type != DTYPE_VNODE) {
1131			error = EBADF;
1132			goto out;
1133		}
1134		if (uap->arg)
1135		        fp->f_fglob->fg_flag &= ~FNORDAHEAD;
1136		else
1137		        fp->f_fglob->fg_flag |= FNORDAHEAD;
1138
1139		goto out;
1140
1141	case F_NOCACHE:
1142		if (fp->f_type != DTYPE_VNODE) {
1143			error = EBADF;
1144			goto out;
1145		}
1146		if (uap->arg)
1147		        fp->f_fglob->fg_flag |= FNOCACHE;
1148		else
1149		        fp->f_fglob->fg_flag &= ~FNOCACHE;
1150
1151		goto out;
1152
1153	case F_NODIRECT:
1154		if (fp->f_type != DTYPE_VNODE) {
1155			error = EBADF;
1156			goto out;
1157		}
1158		if (uap->arg)
1159		        fp->f_fglob->fg_flag |= FNODIRECT;
1160		else
1161		        fp->f_fglob->fg_flag &= ~FNODIRECT;
1162
1163		goto out;
1164
1165	case F_SINGLE_WRITER:
1166		if (fp->f_type != DTYPE_VNODE) {
1167			error = EBADF;
1168			goto out;
1169		}
1170		if (uap->arg)
1171		        fp->f_fglob->fg_flag |= FSINGLE_WRITER;
1172		else
1173		        fp->f_fglob->fg_flag &= ~FSINGLE_WRITER;
1174
1175		goto out;
1176
1177	case F_GLOBAL_NOCACHE:
1178	        if (fp->f_type != DTYPE_VNODE) {
1179		        error = EBADF;
1180			goto out;
1181		}
1182		vp = (struct vnode *)fp->f_data;
1183		proc_fdunlock(p);
1184
1185		if ( (error = vnode_getwithref(vp)) == 0 ) {
1186
1187		        *retval = vnode_isnocache(vp);
1188
1189			if (uap->arg)
1190			        vnode_setnocache(vp);
1191			else
1192			        vnode_clearnocache(vp);
1193
1194			(void)vnode_put(vp);
1195		}
1196		goto outdrop;
1197
1198	case F_CHECK_OPENEVT:
1199	        if (fp->f_type != DTYPE_VNODE) {
1200		        error = EBADF;
1201			goto out;
1202		}
1203		vp = (struct vnode *)fp->f_data;
1204		proc_fdunlock(p);
1205
1206		if ( (error = vnode_getwithref(vp)) == 0 ) {
1207
1208		        *retval = vnode_is_openevt(vp);
1209
1210			if (uap->arg)
1211			        vnode_set_openevt(vp);
1212			else
1213			        vnode_clear_openevt(vp);
1214
1215			(void)vnode_put(vp);
1216		}
1217		goto outdrop;
1218
1219	case F_RDADVISE: {
1220		struct radvisory ra_struct;
1221
1222		if (fp->f_type != DTYPE_VNODE) {
1223			error = EBADF;
1224			goto out;
1225		}
1226		vp = (struct vnode *)fp->f_data;
1227		proc_fdunlock(p);
1228
1229		if ( (error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct))) )
1230			goto outdrop;
1231		if ( (error = vnode_getwithref(vp)) == 0 ) {
1232		        error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1233
1234			(void)vnode_put(vp);
1235		}
1236		goto outdrop;
1237		}
1238
1239        case F_FLUSH_DATA:
1240
1241                if (fp->f_type != DTYPE_VNODE) {
1242                        error = EBADF;
1243                        goto out;
1244                }
1245                vp = (struct vnode *)fp->f_data;
1246                proc_fdunlock(p);
1247
1248                if ( (error = vnode_getwithref(vp)) == 0 ) {
1249                        error = cluster_push(vp, 0);
1250
1251                        (void)vnode_put(vp);
1252                }
1253                goto outdrop;
1254
1255	case F_LOG2PHYS:
1256	case F_LOG2PHYS_EXT: {
1257		struct log2phys l2p_struct;    /* structure for allocate command */
1258		int devBlockSize;
1259
1260		off_t file_offset = 0;
1261		size_t a_size = 0;
1262		size_t run = 0;
1263
1264		if (uap->cmd == F_LOG2PHYS_EXT) {
1265			error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1266			if (error)
1267				goto out;
1268			file_offset = l2p_struct.l2p_devoffset;
1269		} else {
1270			file_offset = fp->f_offset;
1271		}
1272		if (fp->f_type != DTYPE_VNODE) {
1273			error = EBADF;
1274			goto out;
1275		}
1276		vp = (struct vnode *)fp->f_data;
1277		proc_fdunlock(p);
1278		if ( (error = vnode_getwithref(vp)) ) {
1279			goto outdrop;
1280		}
1281		error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1282		if (error) {
1283			(void)vnode_put(vp);
1284			goto outdrop;
1285		}
1286		error = VNOP_BLKTOOFF(vp, lbn, &offset);
1287		if (error) {
1288			(void)vnode_put(vp);
1289			goto outdrop;
1290		}
1291		devBlockSize = vfs_devblocksize(vnode_mount(vp));
1292		if (uap->cmd == F_LOG2PHYS_EXT) {
1293			a_size = l2p_struct.l2p_contigbytes;
1294		} else {
1295			a_size = devBlockSize;
1296		}
1297
1298		error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1299
1300		(void)vnode_put(vp);
1301
1302		if (!error) {
1303			l2p_struct.l2p_flags = 0;	/* for now */
1304			if (uap->cmd == F_LOG2PHYS_EXT) {
1305				l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1306			} else {
1307				l2p_struct.l2p_contigbytes = 0;	/* for now */
1308			}
1309			l2p_struct.l2p_devoffset = bn * devBlockSize;
1310			l2p_struct.l2p_devoffset += file_offset - offset;
1311			error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1312		}
1313		goto outdrop;
1314		}
1315	case F_GETPATH: {
1316		char *pathbufp;
1317		int pathlen;
1318
1319		if (fp->f_type != DTYPE_VNODE) {
1320			error = EBADF;
1321			goto out;
1322		}
1323		vp = (struct vnode *)fp->f_data;
1324		proc_fdunlock(p);
1325
1326		pathlen = MAXPATHLEN;
1327		MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1328		if (pathbufp == NULL) {
1329			error = ENOMEM;
1330			goto outdrop;
1331		}
1332		if ( (error = vnode_getwithref(vp)) == 0 ) {
1333		        error = vn_getpath(vp, pathbufp, &pathlen);
1334		        (void)vnode_put(vp);
1335
1336			if (error == 0)
1337			        error = copyout((caddr_t)pathbufp, argp, pathlen);
1338		}
1339		FREE(pathbufp, M_TEMP);
1340		goto outdrop;
1341	}
1342
1343	case F_PATHPKG_CHECK: {
1344		char *pathbufp;
1345		size_t pathlen;
1346
1347		if (fp->f_type != DTYPE_VNODE) {
1348		        error = EBADF;
1349			goto out;
1350		}
1351		vp = (struct vnode *)fp->f_data;
1352		proc_fdunlock(p);
1353
1354		pathlen = MAXPATHLEN;
1355		pathbufp = kalloc(MAXPATHLEN);
1356
1357		if ( (error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0 ) {
1358		        if ( (error = vnode_getwithref(vp)) == 0 ) {
1359				AUDIT_ARG(text, pathbufp);
1360			        error = vn_path_package_check(vp, pathbufp, pathlen, retval);
1361
1362				(void)vnode_put(vp);
1363			}
1364		}
1365		kfree(pathbufp, MAXPATHLEN);
1366		goto outdrop;
1367	}
1368
1369	case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
1370	case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZECACHE
1371	case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
1372	case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
1373		if (fp->f_type != DTYPE_VNODE) {
1374			error = EBADF;
1375			goto out;
1376		}
1377		vp = (struct vnode *)fp->f_data;
1378		proc_fdunlock(p);
1379
1380		if ( (error = vnode_getwithref(vp)) == 0 ) {
1381		        error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1382
1383			(void)vnode_put(vp);
1384		}
1385		break;
1386	}
1387
1388	/*
1389	 * SPI (private) for opening a file starting from a dir fd
1390	 */
1391	case F_OPENFROM: {
1392		struct user_fopenfrom fopen;
1393		struct vnode_attr va;
1394		struct nameidata nd;
1395		int cmode;
1396
1397		/* Check if this isn't a valid file descriptor */
1398		if ((fp->f_type != DTYPE_VNODE) ||
1399		    (fp->f_flag & FREAD) == 0) {
1400			error = EBADF;
1401			goto out;
1402		}
1403		vp = (struct vnode *)fp->f_data;
1404		proc_fdunlock(p);
1405
1406		if (vnode_getwithref(vp)) {
1407			error = ENOENT;
1408			goto outdrop;
1409		}
1410
1411		/* Only valid for directories */
1412		if (vp->v_type != VDIR) {
1413			vnode_put(vp);
1414			error = ENOTDIR;
1415			goto outdrop;
1416		}
1417
1418		/* Get flags, mode and pathname arguments. */
1419		if (IS_64BIT_PROCESS(p)) {
1420			error = copyin(argp, &fopen, sizeof(fopen));
1421		} else {
1422			struct user32_fopenfrom fopen32;
1423
1424			error = copyin(argp, &fopen32, sizeof(fopen32));
1425			fopen.o_flags = fopen32.o_flags;
1426			fopen.o_mode = fopen32.o_mode;
1427			fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
1428		}
1429		if (error) {
1430			vnode_put(vp);
1431			goto outdrop;
1432		}
1433		AUDIT_ARG(fflags, fopen.o_flags);
1434		AUDIT_ARG(mode, fopen.o_mode);
1435		VATTR_INIT(&va);
1436		/* Mask off all but regular access permissions */
1437		cmode = ((fopen.o_mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1438		VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
1439
1440		/* Start the lookup relative to the file descriptor's vnode. */
1441		NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
1442		       fopen.o_pathname, &context);
1443		nd.ni_dvp = vp;
1444
1445		error = open1(&context, &nd, fopen.o_flags, &va, retval);
1446
1447		vnode_put(vp);
1448		break;
1449	}
1450	/*
1451	 * SPI (private) for unlinking a file starting from a dir fd
1452	 */
1453	case F_UNLINKFROM: {
1454		struct nameidata nd;
1455		user_addr_t pathname;
1456
1457		/* Check if this isn't a valid file descriptor */
1458		if ((fp->f_type != DTYPE_VNODE) ||
1459		    (fp->f_flag & FREAD) == 0) {
1460			error = EBADF;
1461			goto out;
1462		}
1463		vp = (struct vnode *)fp->f_data;
1464		proc_fdunlock(p);
1465
1466		if (vnode_getwithref(vp)) {
1467			error = ENOENT;
1468			goto outdrop;
1469		}
1470
1471		/* Only valid for directories */
1472		if (vp->v_type != VDIR) {
1473			vnode_put(vp);
1474			error = ENOTDIR;
1475			goto outdrop;
1476		}
1477
1478		/* Get flags, mode and pathname arguments. */
1479		if (IS_64BIT_PROCESS(p)) {
1480			pathname = (user_addr_t)argp;
1481		} else {
1482			pathname = CAST_USER_ADDR_T(argp);
1483		}
1484
1485		/* Start the lookup relative to the file descriptor's vnode. */
1486		NDINIT(&nd, DELETE, OP_UNLINK, USEDVP | AUDITVNPATH1, UIO_USERSPACE,
1487		       pathname, &context);
1488		nd.ni_dvp = vp;
1489
1490		error = unlink1(&context, &nd, 0);
1491
1492		vnode_put(vp);
1493		break;
1494
1495	}
1496
1497	case F_ADDSIGS:
1498	case F_ADDFILESIGS:
1499	{
1500		struct user_fsignatures fs;
1501		kern_return_t kr;
1502		vm_offset_t kernel_blob_addr;
1503		vm_size_t kernel_blob_size;
1504
1505		if (fp->f_type != DTYPE_VNODE) {
1506			error = EBADF;
1507			goto out;
1508		}
1509		vp = (struct vnode *)fp->f_data;
1510		proc_fdunlock(p);
1511		error = vnode_getwithref(vp);
1512		if (error)
1513			goto outdrop;
1514
1515		if (IS_64BIT_PROCESS(p)) {
1516			error = copyin(argp, &fs, sizeof (fs));
1517		} else {
1518			struct user32_fsignatures fs32;
1519
1520			error = copyin(argp, &fs32, sizeof (fs32));
1521			fs.fs_file_start = fs32.fs_file_start;
1522			fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
1523			fs.fs_blob_size = fs32.fs_blob_size;
1524		}
1525
1526		if (error) {
1527			vnode_put(vp);
1528			goto outdrop;
1529		}
1530
1531		if(ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start))
1532		{
1533			/*
1534			if(cs_debug)
1535				printf("CODE SIGNING: resident blob offered for: %s\n", vp->v_name);
1536			 */
1537			vnode_put(vp);
1538			goto outdrop;
1539		}
1540
1541#define CS_MAX_BLOB_SIZE (1280ULL * 1024) /* max shared cache file XXX ? */
1542		if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
1543			error = E2BIG;
1544			vnode_put(vp);
1545			goto outdrop;
1546		}
1547
1548		kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
1549		kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
1550		if (kr != KERN_SUCCESS) {
1551			error = ENOMEM;
1552			vnode_put(vp);
1553			goto outdrop;
1554		}
1555
1556		if(uap->cmd == F_ADDSIGS) {
1557			error = copyin(fs.fs_blob_start,
1558				       (void *) kernel_blob_addr,
1559				       kernel_blob_size);
1560		} else /* F_ADDFILESIGS */ {
1561			error = vn_rdwr(UIO_READ,
1562					vp,
1563					(caddr_t) kernel_blob_addr,
1564					kernel_blob_size,
1565					 fs.fs_file_start + fs.fs_blob_start,
1566					UIO_SYSSPACE,
1567					0,
1568					kauth_cred_get(),
1569					0,
1570					p);
1571		}
1572
1573		if (error) {
1574			ubc_cs_blob_deallocate(kernel_blob_addr,
1575					       kernel_blob_size);
1576			vnode_put(vp);
1577			goto outdrop;
1578		}
1579
1580		error = ubc_cs_blob_add(
1581			vp,
1582			CPU_TYPE_ANY,	/* not for a specific architecture */
1583			fs.fs_file_start,
1584			kernel_blob_addr,
1585			kernel_blob_size);
1586		if (error) {
1587			ubc_cs_blob_deallocate(kernel_blob_addr,
1588					       kernel_blob_size);
1589		} else {
1590			/* ubc_blob_add() has consumed "kernel_blob_addr" */
1591#if CHECK_CS_VALIDATION_BITMAP
1592			ubc_cs_validation_bitmap_allocate( vp );
1593#endif
1594		}
1595
1596		(void) vnode_put(vp);
1597		break;
1598	}
1599
1600	case F_MARKDEPENDENCY: {
1601		struct vnode_attr va;
1602		vfs_context_t ctx = vfs_context_current();
1603		kauth_cred_t cred;
1604
1605		if ((current_proc()->p_flag & P_DEPENDENCY_CAPABLE) == 0) {
1606		    error = EPERM;
1607		    goto out;
1608		}
1609
1610		if (fp->f_type != DTYPE_VNODE) {
1611			error = EBADF;
1612			goto out;
1613		}
1614
1615		vp = (struct vnode *)fp->f_data;
1616		proc_fdunlock(p);
1617
1618		if (vnode_getwithref(vp)) {
1619			error = ENOENT;
1620			goto outdrop;
1621		}
1622
1623		if (!vnode_isvroot(vp)) {
1624		    error = EINVAL;
1625		    vnode_put(vp);
1626		    goto outdrop;
1627		}
1628
1629		// get the owner of the root dir
1630		VATTR_INIT(&va);
1631		VATTR_WANTED(&va, va_uid);
1632		if (vnode_getattr(vp, &va, ctx) != 0) {
1633		    error = EINVAL;
1634		    vnode_put(vp);
1635		    goto outdrop;
1636		}
1637
1638		// and last, check that the caller is the super user or
1639		// the owner of the mount point
1640		cred = vfs_context_ucred(ctx);
1641		if (!is_suser() && va.va_uid != kauth_cred_getuid(cred)) {
1642			error = EACCES;
1643			vnode_put(vp);
1644			goto outdrop;
1645		}
1646
1647		// if all those checks pass then we can mark the dependency
1648		vfs_markdependency(vp->v_mount);
1649		error = 0;
1650
1651		vnode_put(vp);
1652
1653		break;
1654	}
1655
1656#if CONFIG_PROTECT
1657	case F_GETPROTECTIONCLASS: {
1658		int class = 0;
1659
1660		if (fp->f_type != DTYPE_VNODE) {
1661			error = EBADF;
1662			goto out;
1663		}
1664		vp = (struct vnode *)fp->f_data;
1665
1666		proc_fdunlock(p);
1667
1668		if (vnode_getwithref(vp)) {
1669			error = ENOENT;
1670			goto outdrop;
1671		}
1672
1673		error = cp_vnode_getclass (vp, &class);
1674		if (error == 0) {
1675			*retval = class;
1676		}
1677
1678		vnode_put(vp);
1679		break;
1680	}
1681
1682	case F_SETPROTECTIONCLASS: {
1683		/* tmp must be a valid PROTECTION_CLASS_* */
1684		tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
1685
1686		if (fp->f_type != DTYPE_VNODE) {
1687			error = EBADF;
1688			goto out;
1689		}
1690		vp = (struct vnode *)fp->f_data;
1691
1692		proc_fdunlock(p);
1693
1694		if (vnode_getwithref(vp)) {
1695			error = ENOENT;
1696			goto outdrop;
1697		}
1698
1699		/* Only go forward if you have write access */
1700		vfs_context_t ctx = vfs_context_current();
1701		if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
1702			vnode_put(vp);
1703			error = EBADF;
1704			goto outdrop;
1705		}
1706		error = cp_vnode_setclass (vp, tmp);
1707		vnode_put(vp);
1708		break;
1709	}
1710
1711	case F_TRANSCODEKEY: {
1712
1713		if (fp->f_type != DTYPE_VNODE) {
1714			error = EBADF;
1715			goto out;
1716		}
1717
1718		vp = (struct vnode *)fp->f_data;
1719		proc_fdunlock(p);
1720
1721		if (vnode_getwithref(vp)) {
1722			error = ENOENT;
1723			goto outdrop;
1724		}
1725
1726		error = cp_vnode_transcode (vp);
1727		vnode_put(vp);
1728		break;
1729	}
1730
1731	case F_GETPROTECTIONLEVEL:  {
1732		uint32_t cp_version = 0;
1733
1734		if (fp->f_type != DTYPE_VNODE) {
1735			error = EBADF;
1736			goto out;
1737		}
1738
1739		vp = (struct vnode*) fp->f_data;
1740		proc_fdunlock (p);
1741
1742		if (vnode_getwithref(vp)) {
1743			error = ENOENT;
1744			goto outdrop;
1745		}
1746
1747		/*
1748		 * if cp_get_major_vers fails, error will be set to proper errno
1749		 * and cp_version will still be 0.
1750		 */
1751
1752		error = cp_get_root_major_vers (vp, &cp_version);
1753		*retval = cp_version;
1754
1755		vnode_put (vp);
1756		break;
1757	}
1758
1759#endif /* CONFIG_PROTECT */
1760
1761	case F_MOVEDATAEXTENTS: {
1762		struct fileproc *fp2 = NULL;
1763		struct vnode *src_vp = NULLVP;
1764		struct vnode *dst_vp = NULLVP;
1765		/* We need to grab the 2nd FD out of the argments before moving on. */
1766		int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
1767
1768		if (fp->f_type != DTYPE_VNODE) {
1769			error = EBADF;
1770			goto out;
1771		}
1772
1773		/* For now, special case HFS+ only, since this is SPI. */
1774		src_vp = (struct vnode *)fp->f_data;
1775		if (src_vp->v_tag != VT_HFS) {
1776			error = EINVAL;
1777			goto out;
1778		}
1779
1780		/*
1781		 * Get the references before we start acquiring iocounts on the vnodes,
1782		 * while we still hold the proc fd lock
1783		 */
1784		if ( (error = fp_lookup(p, fd2, &fp2, 1)) ) {
1785			error = EBADF;
1786			goto out;
1787		}
1788		if (fp2->f_type != DTYPE_VNODE) {
1789			fp_drop(p, fd2, fp2, 1);
1790			error = EBADF;
1791			goto out;
1792		}
1793		dst_vp = (struct vnode *)fp2->f_data;
1794		if (dst_vp->v_tag != VT_HFS) {
1795			fp_drop(p, fd2, fp2, 1);
1796			error = EINVAL;
1797			goto out;
1798		}
1799
1800#if CONFIG_MACF
1801		/* Re-do MAC checks against the new FD, pass in a fake argument */
1802		error = mac_file_check_fcntl(proc_ucred(p), fp2->f_fglob, uap->cmd, 0);
1803		if (error) {
1804			fp_drop(p, fd2, fp2, 1);
1805			goto out;
1806		}
1807#endif
1808		/* Audit the 2nd FD */
1809		AUDIT_ARG(fd, fd2);
1810
1811		proc_fdunlock(p);
1812
1813		if (vnode_getwithref(src_vp)) {
1814			fp_drop(p, fd2, fp2, 0);
1815			error = ENOENT;
1816			goto outdrop;
1817		}
1818		if (vnode_getwithref(dst_vp)) {
1819			vnode_put (src_vp);
1820			fp_drop(p, fd2, fp2, 0);
1821			error = ENOENT;
1822			goto outdrop;
1823		}
1824
1825		/*
1826		 * Basic asserts; validate they are not the same and that
1827		 * both live on the same filesystem.
1828		 */
1829		if (dst_vp == src_vp) {
1830			vnode_put (src_vp);
1831			vnode_put (dst_vp);
1832			fp_drop (p, fd2, fp2, 0);
1833			error = EINVAL;
1834			goto outdrop;
1835		}
1836
1837		if (dst_vp->v_mount != src_vp->v_mount) {
1838			vnode_put (src_vp);
1839			vnode_put (dst_vp);
1840			fp_drop (p, fd2, fp2, 0);
1841			error = EXDEV;
1842			goto outdrop;
1843		}
1844
1845		/* Now we have a legit pair of FDs.  Go to work */
1846
1847		/* Now check for write access to the target files */
1848		if(vnode_authorize(src_vp, NULLVP,
1849						   (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
1850			vnode_put(src_vp);
1851			vnode_put(dst_vp);
1852			fp_drop(p, fd2, fp2, 0);
1853			error = EBADF;
1854			goto outdrop;
1855		}
1856
1857		if(vnode_authorize(dst_vp, NULLVP,
1858						   (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
1859			vnode_put(src_vp);
1860			vnode_put(dst_vp);
1861			fp_drop(p, fd2, fp2, 0);
1862			error = EBADF;
1863			goto outdrop;
1864		}
1865
1866		/* Verify that both vps point to files and not directories */
1867		if ( !vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
1868			error = EINVAL;
1869			vnode_put (src_vp);
1870			vnode_put (dst_vp);
1871			fp_drop (p, fd2, fp2, 0);
1872			goto outdrop;
1873		}
1874
1875		/*
1876		 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
1877		 * We'll pass in our special bit indicating that the new behavior is expected
1878		 */
1879
1880		error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
1881
1882		vnode_put (src_vp);
1883		vnode_put (dst_vp);
1884		fp_drop(p, fd2, fp2, 0);
1885		break;
1886	}
1887
1888
1889	/*
1890	 * SPI (private) for indicating to a filesystem that subsequent writes to
1891	 * the open FD will represent static content.
1892	 */
1893	case F_SETSTATICCONTENT: {
1894		caddr_t ioctl_arg = NULL;
1895
1896		if (uap->arg) {
1897			ioctl_arg = (caddr_t) 1;
1898		}
1899
1900		if (fp->f_type != DTYPE_VNODE) {
1901			error = EBADF;
1902			goto out;
1903		}
1904		vp = (struct vnode *)fp->f_data;
1905		proc_fdunlock(p);
1906
1907		error = vnode_getwithref(vp);
1908		if (error) {
1909			error = ENOENT;
1910			goto outdrop;
1911		}
1912
1913		/* Only go forward if you have write access */
1914		vfs_context_t ctx = vfs_context_current();
1915		if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
1916			vnode_put(vp);
1917			error = EBADF;
1918			goto outdrop;
1919		}
1920
1921		error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
1922		(void)vnode_put(vp);
1923
1924		break;
1925	}
1926
1927	/*
1928	 * Set the vnode pointed to by 'fd'
1929	 * and tag it as the (potentially future) backing store
1930	 * for another filesystem
1931	 */
1932	case F_SETBACKINGSTORE: {
1933		if (fp->f_type != DTYPE_VNODE) {
1934			error = EBADF;
1935			goto out;
1936		}
1937
1938		vp = (struct vnode *)fp->f_data;
1939
1940		if (vp->v_tag != VT_HFS) {
1941			error = EINVAL;
1942			goto out;
1943		}
1944		proc_fdunlock(p);
1945
1946		if (vnode_getwithref(vp)) {
1947			error = ENOENT;
1948			goto outdrop;
1949		}
1950
1951		/* only proceed if you have write access */
1952		vfs_context_t ctx = vfs_context_current();
1953		if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
1954			vnode_put(vp);
1955			error = EBADF;
1956			goto outdrop;
1957		}
1958
1959
1960		/* If arg != 0, set, otherwise unset */
1961		if (uap->arg) {
1962			error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)1, 0, &context);
1963		}
1964		else {
1965			error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)NULL, 0, &context);
1966		}
1967
1968		vnode_put(vp);
1969		break;
1970	}
1971
1972	/*
1973	 * like F_GETPATH, but special semantics for
1974	 * the mobile time machine handler.
1975	 */
1976	case F_GETPATH_MTMINFO: {
1977		char *pathbufp;
1978		int pathlen;
1979
1980		if (fp->f_type != DTYPE_VNODE) {
1981			error = EBADF;
1982			goto out;
1983		}
1984		vp = (struct vnode *)fp->f_data;
1985		proc_fdunlock(p);
1986
1987		pathlen = MAXPATHLEN;
1988		MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1989		if (pathbufp == NULL) {
1990			error = ENOMEM;
1991			goto outdrop;
1992		}
1993		if ( (error = vnode_getwithref(vp)) == 0 ) {
1994			int backingstore = 0;
1995
1996			/* Check for error from vn_getpath before moving on */
1997			if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
1998				if (vp->v_tag == VT_HFS) {
1999					error = VNOP_IOCTL (vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
2000				}
2001				(void)vnode_put(vp);
2002
2003				if (error == 0) {
2004					error = copyout((caddr_t)pathbufp, argp, pathlen);
2005				}
2006				if (error == 0) {
2007					/*
2008					 * If the copyout was successful, now check to ensure
2009					 * that this vnode is not a BACKINGSTORE vnode.  mtmd
2010					 * wants the path regardless.
2011					 */
2012					if (backingstore) {
2013						error = EBUSY;
2014					}
2015				}
2016			} else
2017				(void)vnode_put(vp);
2018		}
2019		FREE(pathbufp, M_TEMP);
2020		goto outdrop;
2021	}
2022
2023	default:
2024		/*
2025		 * This is an fcntl() that we d not recognize at this level;
2026		 * if this is a vnode, we send it down into the VNOP_IOCTL
2027		 * for this vnode; this can include special devices, and will
2028		 * effectively overload fcntl() to send ioctl()'s.
2029		 */
2030		if((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)){
2031                	error = EINVAL;
2032			goto out;
2033		}
2034
2035		if (fp->f_type != DTYPE_VNODE) {
2036			error = EBADF;
2037			goto out;
2038		}
2039		vp = (struct vnode *)fp->f_data;
2040		proc_fdunlock(p);
2041
2042		if ( (error = vnode_getwithref(vp)) == 0 ) {
2043#define STK_PARAMS 128
2044			char stkbuf[STK_PARAMS];
2045			unsigned int size;
2046			caddr_t data, memp;
2047			/*
2048			 * For this to work properly, we have to copy in the
2049			 * ioctl() cmd argument if there is one; we must also
2050			 * check that a command parameter, if present, does
2051			 * not exceed the maximum command length dictated by
2052			 * the number of bits we have available in the command
2053			 * to represent a structure length.  Finally, we have
2054			 * to copy the results back out, if it is that type of
2055			 * ioctl().
2056			 */
2057			size = IOCPARM_LEN(uap->cmd);
2058			if (size > IOCPARM_MAX) {
2059				(void)vnode_put(vp);
2060				error = EINVAL;
2061				break;
2062			}
2063
2064			memp = NULL;
2065			if (size > sizeof (stkbuf)) {
2066				if ((memp = (caddr_t)kalloc(size)) == 0) {
2067					(void)vnode_put(vp);
2068					error = ENOMEM;
2069					goto outdrop;
2070				}
2071				data = memp;
2072			} else {
2073				data = &stkbuf[0];
2074			}
2075
2076			if (uap->cmd & IOC_IN) {
2077				if (size) {
2078					/* structure */
2079					error = copyin(argp, data, size);
2080					if (error) {
2081						(void)vnode_put(vp);
2082						if (memp)
2083							kfree(memp, size);
2084						goto outdrop;
2085					}
2086				} else {
2087					/* int */
2088					if (is64bit) {
2089						*(user_addr_t *)data = argp;
2090					} else {
2091						*(uint32_t *)data = (uint32_t)argp;
2092					}
2093				};
2094			} else if ((uap->cmd & IOC_OUT) && size) {
2095				/*
2096				 * Zero the buffer so the user always
2097				 * gets back something deterministic.
2098				 */
2099				bzero(data, size);
2100			} else if (uap->cmd & IOC_VOID) {
2101				if (is64bit) {
2102				    *(user_addr_t *)data = argp;
2103				} else {
2104				    *(uint32_t *)data = (uint32_t)argp;
2105				}
2106			}
2107
2108			error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
2109
2110			(void)vnode_put(vp);
2111
2112			/* Copy any output data to user */
2113			if (error == 0 && (uap->cmd & IOC_OUT) && size)
2114				error = copyout(data, argp, size);
2115			if (memp)
2116				kfree(memp, size);
2117		}
2118		break;
2119	}
2120
2121outdrop:
2122	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2123	fp_drop(p, fd, fp, 0);
2124	return(error);
2125out:
2126	fp_drop(p, fd, fp, 1);
2127	proc_fdunlock(p);
2128	return(error);
2129}
2130
2131
2132/*
2133 * finishdup
2134 *
2135 * Description:	Common code for dup, dup2, and fcntl(F_DUPFD).
2136 *
2137 * Parameters:	p				Process performing the dup
2138 *		old				The fd to dup
2139 *		new				The fd to dup it to
2140 *		fd_flags			Flags to augment the new fd
2141 *		retval				Pointer to the call return area
2142 *
2143 * Returns:	0				Success
2144 *		EBADF
2145 *		ENOMEM
2146 *
2147 * Implicit returns:
2148 *		*retval (modified)		The new descriptor
2149 *
2150 * Locks:	Assumes proc_fdlock for process pointing to fdp is held by
2151 *		the caller
2152 *
2153 * Notes:	This function may drop and reacquire this lock; it is unsafe
2154 *		for a caller to assume that other state protected by the lock
2155 *		has not been subsequently changed out from under it.
2156 */
2157int
2158finishdup(proc_t p,
2159    struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
2160{
2161	struct fileproc *nfp;
2162	struct fileproc *ofp;
2163#if CONFIG_MACF
2164	int error;
2165#endif
2166
2167#if DIAGNOSTIC
2168	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2169#endif
2170	if ((ofp = fdp->fd_ofiles[old]) == NULL ||
2171	    (fdp->fd_ofileflags[old] & UF_RESERVED)) {
2172		fdrelse(p, new);
2173		return (EBADF);
2174	}
2175	fg_ref(ofp);
2176
2177#if CONFIG_MACF
2178	error = mac_file_check_dup(proc_ucred(p), ofp->f_fglob, new);
2179	if (error) {
2180		fg_drop(ofp);
2181		fdrelse(p, new);
2182		return (error);
2183	}
2184#endif
2185
2186	proc_fdunlock(p);
2187
2188	MALLOC_ZONE(nfp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK);
2189	/* Failure check follows proc_fdlock() due to handling requirements */
2190
2191	proc_fdlock(p);
2192
2193	if (nfp == NULL) {
2194		fg_drop(ofp);
2195		fdrelse(p, new);
2196		return (ENOMEM);
2197	}
2198
2199	bzero(nfp, sizeof(struct fileproc));
2200
2201	nfp->f_flags = 0;
2202	nfp->f_fglob = ofp->f_fglob;
2203	nfp->f_iocount = 0;
2204
2205#if DIAGNOSTIC
2206	if (fdp->fd_ofiles[new] != 0)
2207		panic("finishdup: overwriting fd_ofiles with new %d", new);
2208	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
2209		panic("finishdup: unreserved fileflags with new %d", new);
2210#endif
2211
2212	if (new > fdp->fd_lastfile)
2213		fdp->fd_lastfile = new;
2214	*fdflags(p, new) |= fd_flags;
2215	procfdtbl_releasefd(p, new, nfp);
2216	*retval = new;
2217	return (0);
2218}
2219
2220
2221/*
2222 * close
2223 *
2224 * Description:	The implementation of the close(2) system call
2225 *
2226 * Parameters:	p			Process in whose per process file table
2227 *					the close is to occur
2228 *		uap->fd			fd to be closed
2229 *		retval			<unused>
2230 *
2231 * Returns:	0			Success
2232 *	fp_lookup:EBADF			Bad file descriptor
2233 *	close_internal:EBADF
2234 *	close_internal:??? 		Anything returnable by a per-fileops
2235 *					close function
2236 */
2237int
2238close(proc_t p, struct close_args *uap, int32_t *retval)
2239{
2240	__pthread_testcancel(1);
2241	return(close_nocancel(p, (struct close_nocancel_args *)uap, retval));
2242}
2243
2244
2245int
2246close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
2247{
2248	struct fileproc *fp;
2249	int fd = uap->fd;
2250	int error =0;
2251
2252	AUDIT_SYSCLOSE(p, fd);
2253
2254	proc_fdlock(p);
2255
2256	if ( (error = fp_lookup(p,fd,&fp, 1)) ) {
2257		proc_fdunlock(p);
2258		return(error);
2259	}
2260
2261	error = close_internal_locked(p, fd, fp, 0);
2262
2263	proc_fdunlock(p);
2264
2265	return(error);
2266}
2267
2268
2269/*
2270 * close_internal_locked
2271 *
2272 * Close a file descriptor.
2273 *
2274 * Parameters:	p			Process in whose per process file table
2275 *					the close is to occur
2276 *		fd			fd to be closed
2277 *		fp			fileproc associated with the fd
2278 *
2279 * Returns:	0			Success
2280 *		EBADF			fd already in close wait state
2281 *	closef_locked:??? 		Anything returnable by a per-fileops
2282 *					close function
2283 *
2284 * Locks:	Assumes proc_fdlock for process is held by the caller and returns
2285 *		with lock held
2286 *
2287 * Notes:	This function may drop and reacquire this lock; it is unsafe
2288 *		for a caller to assume that other state protected by the lock
2289 *		has not been subsequently changes out from under it, if the
2290 *		caller made the call with the lock held.
2291 */
2292static int
2293close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
2294{
2295	struct filedesc *fdp = p->p_fd;
2296	int error =0;
2297	int resvfd = flags & FD_DUP2RESV;
2298
2299
2300#if DIAGNOSTIC
2301	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2302#endif
2303
2304	/* Keep people from using the filedesc while we are closing it */
2305	procfdtbl_markclosefd(p, fd);
2306
2307
2308	if ((fp->f_flags & FP_CLOSING) == FP_CLOSING) {
2309		panic("close_internal_locked: being called on already closing fd");
2310	}
2311
2312
2313#if DIAGNOSTIC
2314	if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
2315		panic("close_internal: unreserved fileflags with fd %d", fd);
2316#endif
2317
2318	fp->f_flags |= FP_CLOSING;
2319
2320	if ( (fp->f_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners() ) {
2321
2322	        proc_fdunlock(p);
2323
2324		if ( (fp->f_type == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners() ) {
2325		        /*
2326			 * call out to allow 3rd party notification of close.
2327			 * Ignore result of kauth_authorize_fileop call.
2328			 */
2329		        if (vnode_getwithref((vnode_t)fp->f_data) == 0) {
2330		        	u_int	fileop_flags = 0;
2331		        	if ((fp->f_flags & FP_WRITTEN) != 0)
2332		        		fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
2333			        kauth_authorize_fileop(fp->f_fglob->fg_cred, KAUTH_FILEOP_CLOSE,
2334						       (uintptr_t)fp->f_data, (uintptr_t)fileop_flags);
2335				vnode_put((vnode_t)fp->f_data);
2336			}
2337		}
2338		if (fp->f_flags & FP_AIOISSUED)
2339		        /*
2340			 * cancel all async IO requests that can be cancelled.
2341			 */
2342		        _aio_close( p, fd );
2343
2344		proc_fdlock(p);
2345	}
2346
2347	if (fd < fdp->fd_knlistsize)
2348		knote_fdclose(p, fd);
2349
2350	if (fp->f_flags & FP_WAITEVENT)
2351		(void)waitevent_close(p, fp);
2352
2353	if ((fp->f_flags & FP_INCHRREAD) == 0)
2354		fileproc_drain(p, fp);
2355
2356	if (resvfd == 0) {
2357		_fdrelse(p, fd);
2358	} else {
2359		procfdtbl_reservefd(p, fd);
2360	}
2361
2362	error = closef_locked(fp, fp->f_fglob, p);
2363	if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE)
2364		wakeup(&fp->f_flags);
2365	fp->f_flags &= ~(FP_WAITCLOSE | FP_CLOSING);
2366
2367	proc_fdunlock(p);
2368
2369	FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
2370
2371	proc_fdlock(p);
2372
2373#if DIAGNOSTIC
2374	if (resvfd != 0) {
2375		if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
2376			panic("close with reserved fd returns with freed fd:%d: proc: %p", fd, p);
2377	}
2378#endif
2379
2380	return(error);
2381}
2382
2383
2384/*
2385 * fstat1
2386 *
2387 * Description:	Return status information about a file descriptor.
2388 *
2389 * Parameters:	p				The process doing the fstat
2390 *		fd				The fd to stat
2391 *		ub				The user stat buffer
2392 *		xsecurity			The user extended security
2393 *						buffer, or 0 if none
2394 *		xsecurity_size			The size of xsecurity, or 0
2395 *						if no xsecurity
2396 *		isstat64			Flag to indicate 64 bit version
2397 *						for inode size, etc.
2398 *
2399 * Returns:	0				Success
2400 *		EBADF
2401 *		EFAULT
2402 *	fp_lookup:EBADF				Bad file descriptor
2403 *	vnode_getwithref:???
2404 *	copyout:EFAULT
2405 *	vnode_getwithref:???
2406 *	vn_stat:???
2407 *	soo_stat:???
2408 *	pipe_stat:???
2409 *	pshm_stat:???
2410 *	kqueue_stat:???
2411 *
2412 * Notes:	Internal implementation for all other fstat() related
2413 *		functions
2414 *
2415 *		XXX switch on node type is bogus; need a stat in struct
2416 *		XXX fileops instead.
2417 */
2418static int
2419fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
2420{
2421	struct fileproc *fp;
2422	union {
2423		struct stat sb;
2424		struct stat64 sb64;
2425	} source;
2426	union {
2427		struct user64_stat user64_sb;
2428		struct user32_stat user32_sb;
2429		struct user64_stat64 user64_sb64;
2430		struct user32_stat64 user32_sb64;
2431	} dest;
2432	int error, my_size;
2433	int funnel_state;
2434	file_type_t type;
2435	caddr_t data;
2436	kauth_filesec_t fsec;
2437	user_size_t xsecurity_bufsize;
2438	vfs_context_t ctx = vfs_context_current();
2439	void * sbptr;
2440
2441
2442	AUDIT_ARG(fd, fd);
2443
2444	if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
2445		return(error);
2446	}
2447	type = fp->f_type;
2448	data = fp->f_data;
2449	fsec = KAUTH_FILESEC_NONE;
2450
2451	sbptr = (void *)&source;
2452
2453	switch (type) {
2454
2455	case DTYPE_VNODE:
2456		if ((error = vnode_getwithref((vnode_t)data)) == 0) {
2457			/*
2458			 * If the caller has the file open, and is not
2459			 * requesting extended security information, we are
2460			 * going to let them get the basic stat information.
2461			 */
2462			if (xsecurity == USER_ADDR_NULL) {
2463				error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, ctx);
2464			} else {
2465				error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, ctx);
2466			}
2467
2468			AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
2469			(void)vnode_put((vnode_t)data);
2470		}
2471		break;
2472
2473#if SOCKETS
2474	case DTYPE_SOCKET:
2475		error = soo_stat((struct socket *)data, sbptr, isstat64);
2476		break;
2477#endif /* SOCKETS */
2478
2479	case DTYPE_PIPE:
2480		error = pipe_stat((void *)data, sbptr, isstat64);
2481		break;
2482
2483	case DTYPE_PSXSHM:
2484		error = pshm_stat((void *)data, sbptr, isstat64);
2485		break;
2486
2487	case DTYPE_KQUEUE:
2488	        funnel_state = thread_funnel_set(kernel_flock, TRUE);
2489		error = kqueue_stat(fp, sbptr, isstat64, p);
2490		thread_funnel_set(kernel_flock, funnel_state);
2491		break;
2492
2493	default:
2494		error = EBADF;
2495		goto out;
2496	}
2497	if (error == 0) {
2498		caddr_t sbp;
2499
2500		if (isstat64 != 0) {
2501			source.sb64.st_lspare = 0;
2502			source.sb64.st_qspare[0] = 0LL;
2503			source.sb64.st_qspare[1] = 0LL;
2504
2505			if (IS_64BIT_PROCESS(current_proc())) {
2506				munge_user64_stat64(&source.sb64, &dest.user64_sb64);
2507				my_size = sizeof(dest.user64_sb64);
2508				sbp = (caddr_t)&dest.user64_sb64;
2509			} else {
2510				munge_user32_stat64(&source.sb64, &dest.user32_sb64);
2511				my_size = sizeof(dest.user32_sb64);
2512				sbp = (caddr_t)&dest.user32_sb64;
2513			}
2514		} else {
2515			source.sb.st_lspare = 0;
2516			source.sb.st_qspare[0] = 0LL;
2517			source.sb.st_qspare[1] = 0LL;
2518			if (IS_64BIT_PROCESS(current_proc())) {
2519				munge_user64_stat(&source.sb, &dest.user64_sb);
2520				my_size = sizeof(dest.user64_sb);
2521				sbp = (caddr_t)&dest.user64_sb;
2522			} else {
2523				munge_user32_stat(&source.sb, &dest.user32_sb);
2524				my_size = sizeof(dest.user32_sb);
2525				sbp = (caddr_t)&dest.user32_sb;
2526			}
2527		}
2528
2529		error = copyout(sbp, ub, my_size);
2530	}
2531
2532	/* caller wants extended security information? */
2533	if (xsecurity != USER_ADDR_NULL) {
2534
2535		/* did we get any? */
2536		 if (fsec == KAUTH_FILESEC_NONE) {
2537			if (susize(xsecurity_size, 0) != 0) {
2538				error = EFAULT;
2539				goto out;
2540			}
2541		} else {
2542			/* find the user buffer size */
2543			xsecurity_bufsize = fusize(xsecurity_size);
2544
2545			/* copy out the actual data size */
2546			if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
2547				error = EFAULT;
2548				goto out;
2549			}
2550
2551			/* if the caller supplied enough room, copy out to it */
2552			if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
2553				error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
2554		}
2555	}
2556out:
2557	fp_drop(p, fd, fp, 0);
2558	if (fsec != NULL)
2559		kauth_filesec_free(fsec);
2560	return (error);
2561}
2562
2563
2564/*
2565 * fstat_extended
2566 *
2567 * Description:	Extended version of fstat supporting returning extended
2568 *		security information
2569 *
2570 * Parameters:	p				The process doing the fstat
2571 *		uap->fd				The fd to stat
2572 *		uap->ub				The user stat buffer
2573 *		uap->xsecurity			The user extended security
2574 *						buffer, or 0 if none
2575 *		uap->xsecurity_size		The size of xsecurity, or 0
2576 *
2577 * Returns:	0				Success
2578 *		!0				Errno (see fstat1)
2579 */
2580int
2581fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
2582{
2583	return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
2584}
2585
2586
2587/*
2588 * fstat
2589 *
2590 * Description:	Get file status for the file associated with fd
2591 *
2592 * Parameters:	p				The process doing the fstat
2593 *		uap->fd				The fd to stat
2594 *		uap->ub				The user stat buffer
2595 *
2596 * Returns:	0				Success
2597 *		!0				Errno (see fstat1)
2598 */
2599int
2600fstat(proc_t p, register struct fstat_args *uap, __unused int32_t *retval)
2601{
2602	return(fstat1(p, uap->fd, uap->ub, 0, 0, 0));
2603}
2604
2605
2606/*
2607 * fstat64_extended
2608 *
2609 * Description:	Extended version of fstat64 supporting returning extended
2610 *		security information
2611 *
2612 * Parameters:	p				The process doing the fstat
2613 *		uap->fd				The fd to stat
2614 *		uap->ub				The user stat buffer
2615 *		uap->xsecurity			The user extended security
2616 *						buffer, or 0 if none
2617 *		uap->xsecurity_size		The size of xsecurity, or 0
2618 *
2619 * Returns:	0				Success
2620 *		!0				Errno (see fstat1)
2621 */
2622int
2623fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
2624{
2625	return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
2626}
2627
2628
2629/*
2630 * fstat64
2631 *
2632 * Description:	Get 64 bit version of the file status for the file associated
2633 *		with fd
2634 *
2635 * Parameters:	p				The process doing the fstat
2636 *		uap->fd				The fd to stat
2637 *		uap->ub				The user stat buffer
2638 *
2639 * Returns:	0				Success
2640 *		!0				Errno (see fstat1)
2641 */
2642int
2643fstat64(proc_t p, register struct fstat64_args *uap, __unused int32_t *retval)
2644{
2645	return(fstat1(p, uap->fd, uap->ub, 0, 0, 1));
2646}
2647
2648
2649/*
2650 * fpathconf
2651 *
2652 * Description:	Return pathconf information about a file descriptor.
2653 *
2654 * Parameters:	p				Process making the request
2655 *		uap->fd				fd to get information about
2656 *		uap->name			Name of information desired
2657 *		retval				Pointer to the call return area
2658 *
2659 * Returns:	0				Success
2660 *		EINVAL
2661 *	fp_lookup:EBADF				Bad file descriptor
2662 *	vnode_getwithref:???
2663 *	vn_pathconf:???
2664 *
2665 * Implicit returns:
2666 *		*retval (modified)		Returned information (numeric)
2667 */
2668int
2669fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
2670{
2671	int fd = uap->fd;
2672	struct fileproc *fp;
2673	struct vnode *vp;
2674	int error = 0;
2675	file_type_t type;
2676	caddr_t data;
2677
2678
2679	AUDIT_ARG(fd, uap->fd);
2680	if ( (error = fp_lookup(p, fd, &fp, 0)) )
2681		return(error);
2682	type = fp->f_type;
2683	data = fp->f_data;
2684
2685	switch (type) {
2686
2687	case DTYPE_SOCKET:
2688		if (uap->name != _PC_PIPE_BUF) {
2689			error = EINVAL;
2690			goto out;
2691		}
2692		*retval = PIPE_BUF;
2693		error = 0;
2694		goto out;
2695
2696	case DTYPE_PIPE:
2697		if (uap->name != _PC_PIPE_BUF) {
2698			error = EINVAL;
2699			goto out;
2700		}
2701		*retval = PIPE_BUF;
2702		error = 0;
2703		goto out;
2704
2705	case DTYPE_VNODE:
2706		vp = (struct vnode *)data;
2707
2708		if ( (error = vnode_getwithref(vp)) == 0) {
2709		        AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2710
2711			error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
2712
2713			(void)vnode_put(vp);
2714		}
2715		goto out;
2716
2717	case DTYPE_PSXSHM:
2718	case DTYPE_PSXSEM:
2719	case DTYPE_KQUEUE:
2720	case DTYPE_FSEVENTS:
2721		error = EINVAL;
2722		goto out;
2723
2724	}
2725	/*NOTREACHED*/
2726out:
2727	fp_drop(p, fd, fp, 0);
2728	return(error);
2729}
2730
2731/*
2732 * Statistics counter for the number of times a process calling fdalloc()
2733 * has resulted in an expansion of the per process open file table.
2734 *
2735 * XXX This would likely be of more use if it were per process
2736 */
2737int fdexpand;
2738
2739
2740/*
2741 * fdalloc
2742 *
2743 * Description:	Allocate a file descriptor for the process.
2744 *
2745 * Parameters:	p				Process to allocate the fd in
2746 *		want				The fd we would prefer to get
2747 *		result				Pointer to fd we got
2748 *
2749 * Returns:	0				Success
2750 *		EMFILE
2751 *		ENOMEM
2752 *
2753 * Implicit returns:
2754 *		*result (modified)		The fd which was allocated
2755 */
2756int
2757fdalloc(proc_t p, int want, int *result)
2758{
2759	struct filedesc *fdp = p->p_fd;
2760	int i;
2761	int lim, last, numfiles, oldnfiles;
2762	struct fileproc **newofiles, **ofiles;
2763	char *newofileflags;
2764
2765	/*
2766	 * Search for a free descriptor starting at the higher
2767	 * of want or fd_freefile.  If that fails, consider
2768	 * expanding the ofile array.
2769	 */
2770#if DIAGNOSTIC
2771	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2772#endif
2773
2774	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
2775	for (;;) {
2776		last = min(fdp->fd_nfiles, lim);
2777		if ((i = want) < fdp->fd_freefile)
2778			i = fdp->fd_freefile;
2779		for (; i < last; i++) {
2780			if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
2781				procfdtbl_reservefd(p, i);
2782				if (i > fdp->fd_lastfile)
2783					fdp->fd_lastfile = i;
2784				if (want <= fdp->fd_freefile)
2785					fdp->fd_freefile = i;
2786				*result = i;
2787				return (0);
2788			}
2789		}
2790
2791		/*
2792		 * No space in current array.  Expand?
2793		 */
2794		if (fdp->fd_nfiles >= lim)
2795			return (EMFILE);
2796		if (fdp->fd_nfiles < NDEXTENT)
2797			numfiles = NDEXTENT;
2798		else
2799			numfiles = 2 * fdp->fd_nfiles;
2800		/* Enforce lim */
2801		if (numfiles > lim)
2802			numfiles = lim;
2803		proc_fdunlock(p);
2804		MALLOC_ZONE(newofiles, struct fileproc **,
2805				numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
2806		proc_fdlock(p);
2807		if (newofiles == NULL) {
2808			return (ENOMEM);
2809		}
2810		if (fdp->fd_nfiles >= numfiles) {
2811			FREE_ZONE(newofiles, numfiles * OFILESIZE, M_OFILETABL);
2812			continue;
2813		}
2814		newofileflags = (char *) &newofiles[numfiles];
2815		/*
2816		 * Copy the existing ofile and ofileflags arrays
2817		 * and zero the new portion of each array.
2818		 */
2819		oldnfiles = fdp->fd_nfiles;
2820		(void) memcpy(newofiles, fdp->fd_ofiles,
2821				oldnfiles * sizeof(*fdp->fd_ofiles));
2822		(void) memset(&newofiles[oldnfiles], 0,
2823				(numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
2824
2825		(void) memcpy(newofileflags, fdp->fd_ofileflags,
2826				oldnfiles * sizeof(*fdp->fd_ofileflags));
2827		(void) memset(&newofileflags[oldnfiles], 0,
2828				(numfiles - oldnfiles) *
2829						sizeof(*fdp->fd_ofileflags));
2830		ofiles = fdp->fd_ofiles;
2831		fdp->fd_ofiles = newofiles;
2832		fdp->fd_ofileflags = newofileflags;
2833		fdp->fd_nfiles = numfiles;
2834		FREE_ZONE(ofiles, oldnfiles * OFILESIZE, M_OFILETABL);
2835		fdexpand++;
2836	}
2837}
2838
2839
2840/*
2841 * fdavail
2842 *
2843 * Description:	Check to see whether n user file descriptors are available
2844 *		to the process p.
2845 *
2846 * Parameters:	p				Process to check in
2847 *		n				The number of fd's desired
2848 *
2849 * Returns:	0				No
2850 *		1				Yes
2851 *
2852 * Locks:	Assumes proc_fdlock for process is held by the caller
2853 *
2854 * Notes:	The answer only remains valid so long as the proc_fdlock is
2855 *		held by the caller.
2856 */
2857int
2858fdavail(proc_t p, int n)
2859{
2860	struct filedesc *fdp = p->p_fd;
2861	struct fileproc **fpp;
2862	char *flags;
2863	int i, lim;
2864
2865	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
2866	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
2867		return (1);
2868	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
2869	flags = &fdp->fd_ofileflags[fdp->fd_freefile];
2870	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++)
2871		if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0)
2872			return (1);
2873	return (0);
2874}
2875
2876
2877/*
2878 * fdrelse
2879 *
2880 * Description:	Legacy KPI wrapper function for _fdrelse
2881 *
2882 * Parameters:	p				Process in which fd lives
2883 *		fd				fd to free
2884 *
2885 * Returns:	void
2886 *
2887 * Locks:	Assumes proc_fdlock for process is held by the caller
2888 */
2889void
2890fdrelse(proc_t p, int fd)
2891{
2892	_fdrelse(p, fd);
2893}
2894
2895
2896/*
2897 * fdgetf_noref
2898 *
2899 * Description:	Get the fileproc pointer for the given fd from the per process
2900 *		open file table without taking an explicit reference on it.
2901 *
2902 * Parameters:	p				Process containing fd
2903 *		fd				fd to obtain fileproc for
2904 *		resultfp			Pointer to pointer return area
2905 *
2906 * Returns:	0				Success
2907 *		EBADF
2908 *
2909 * Implicit returns:
2910 *		*resultfp (modified)		Pointer to fileproc pointer
2911 *
2912 * Locks:	Assumes proc_fdlock for process is held by the caller
2913 *
2914 * Notes:	Because there is no reference explicitly taken, the returned
2915 *		fileproc pointer is only valid so long as the proc_fdlock
2916 *		remains held by the caller.
2917 */
2918int
2919fdgetf_noref(proc_t p, int fd, struct fileproc **resultfp)
2920{
2921	struct filedesc *fdp = p->p_fd;
2922	struct fileproc *fp;
2923
2924	if (fd < 0 || fd >= fdp->fd_nfiles ||
2925			(fp = fdp->fd_ofiles[fd]) == NULL ||
2926			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
2927		return (EBADF);
2928	}
2929	if (resultfp)
2930		*resultfp = fp;
2931	return (0);
2932}
2933
2934
2935/*
2936 * fp_getfvp
2937 *
2938 * Description:	Get fileproc and vnode pointer for a given fd from the per
2939 *		process open file table of the specified process, and if
2940 *		successful, increment the f_iocount
2941 *
2942 * Parameters:	p				Process in which fd lives
2943 *		fd				fd to get information for
2944 *		resultfp			Pointer to result fileproc
2945 *						pointer area, or 0 if none
2946 *		resultvp			Pointer to result vnode pointer
2947 *						area, or 0 if none
2948 *
2949 * Returns:	0				Success
2950 *		EBADF				Bad file descriptor
2951 *		ENOTSUP				fd does not refer to a vnode
2952 *
2953 * Implicit returns:
2954 *		*resultfp (modified)		Fileproc pointer
2955 *		*resultvp (modified)		vnode pointer
2956 *
2957 * Notes:	The resultfp and resultvp fields are optional, and may be
2958 *		independently specified as NULL to skip returning information
2959 *
2960 * Locks:	Internally takes and releases proc_fdlock
2961 */
2962int
2963fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
2964{
2965	struct filedesc *fdp = p->p_fd;
2966	struct fileproc *fp;
2967
2968	proc_fdlock_spin(p);
2969	if (fd < 0 || fd >= fdp->fd_nfiles ||
2970			(fp = fdp->fd_ofiles[fd]) == NULL ||
2971			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
2972		proc_fdunlock(p);
2973		return (EBADF);
2974	}
2975	if (fp->f_type != DTYPE_VNODE) {
2976		proc_fdunlock(p);
2977		return(ENOTSUP);
2978	}
2979	fp->f_iocount++;
2980
2981	if (resultfp)
2982		*resultfp = fp;
2983	if (resultvp)
2984		*resultvp = (struct vnode *)fp->f_data;
2985	proc_fdunlock(p);
2986
2987	return (0);
2988}
2989
2990
2991/*
2992 * fp_getfvpandvid
2993 *
2994 * Description:	Get fileproc, vnode pointer, and vid for a given fd from the
2995 *		per process open file table of the specified process, and if
2996 *		successful, increment the f_iocount
2997 *
2998 * Parameters:	p				Process in which fd lives
2999 *		fd				fd to get information for
3000 *		resultfp			Pointer to result fileproc
3001 *						pointer area, or 0 if none
3002 *		resultvp			Pointer to result vnode pointer
3003 *						area, or 0 if none
3004 *		vidp				Pointer to resuld vid area
3005 *
3006 * Returns:	0				Success
3007 *		EBADF				Bad file descriptor
3008 *		ENOTSUP				fd does not refer to a vnode
3009 *
3010 * Implicit returns:
3011 *		*resultfp (modified)		Fileproc pointer
3012 *		*resultvp (modified)		vnode pointer
3013 *		*vidp				vid value
3014 *
3015 * Notes:	The resultfp and resultvp fields are optional, and may be
3016 *		independently specified as NULL to skip returning information
3017 *
3018 * Locks:	Internally takes and releases proc_fdlock
3019 */
3020int
3021fp_getfvpandvid(proc_t p, int fd, struct fileproc **resultfp,
3022		struct vnode **resultvp, uint32_t *vidp)
3023{
3024	struct filedesc *fdp = p->p_fd;
3025	struct fileproc *fp;
3026
3027	proc_fdlock_spin(p);
3028	if (fd < 0 || fd >= fdp->fd_nfiles ||
3029			(fp = fdp->fd_ofiles[fd]) == NULL ||
3030			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3031		proc_fdunlock(p);
3032		return (EBADF);
3033	}
3034	if (fp->f_type != DTYPE_VNODE) {
3035		proc_fdunlock(p);
3036		return(ENOTSUP);
3037	}
3038	fp->f_iocount++;
3039
3040	if (resultfp)
3041		*resultfp = fp;
3042	if (resultvp)
3043		*resultvp = (struct vnode *)fp->f_data;
3044	if (vidp)
3045		*vidp = (uint32_t)vnode_vid((struct vnode *)fp->f_data);
3046	proc_fdunlock(p);
3047
3048	return (0);
3049}
3050
3051
3052/*
3053 * fp_getfsock
3054 *
3055 * Description:	Get fileproc and socket pointer for a given fd from the
3056 *		per process open file table of the specified process, and if
3057 *		successful, increment the f_iocount
3058 *
3059 * Parameters:	p				Process in which fd lives
3060 *		fd				fd to get information for
3061 *		resultfp			Pointer to result fileproc
3062 *						pointer area, or 0 if none
3063 *		results				Pointer to result socket
3064 *						pointer area, or 0 if none
3065 *
3066 * Returns:	EBADF			The file descriptor is invalid
3067 *		EOPNOTSUPP		The file descriptor is not a socket
3068 *		0			Success
3069 *
3070 * Implicit returns:
3071 *		*resultfp (modified)		Fileproc pointer
3072 *		*results (modified)		socket pointer
3073 *
3074 * Notes:	EOPNOTSUPP should probably be ENOTSOCK; this function is only
3075 *		ever called from accept1().
3076 */
3077int
3078fp_getfsock(proc_t p, int fd, struct fileproc **resultfp,
3079	    struct socket **results)
3080{
3081	struct filedesc *fdp = p->p_fd;
3082	struct fileproc *fp;
3083
3084	proc_fdlock_spin(p);
3085	if (fd < 0 || fd >= fdp->fd_nfiles ||
3086			(fp = fdp->fd_ofiles[fd]) == NULL ||
3087			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3088		proc_fdunlock(p);
3089		return (EBADF);
3090	}
3091	if (fp->f_type != DTYPE_SOCKET) {
3092		proc_fdunlock(p);
3093		return(EOPNOTSUPP);
3094	}
3095	fp->f_iocount++;
3096
3097	if (resultfp)
3098		*resultfp = fp;
3099	if (results)
3100		*results = (struct socket *)fp->f_data;
3101	proc_fdunlock(p);
3102
3103	return (0);
3104}
3105
3106
3107/*
3108 * fp_getfkq
3109 *
3110 * Description:	Get fileproc and kqueue pointer for a given fd from the
3111 *		per process open file table of the specified process, and if
3112 *		successful, increment the f_iocount
3113 *
3114 * Parameters:	p				Process in which fd lives
3115 *		fd				fd to get information for
3116 *		resultfp			Pointer to result fileproc
3117 *						pointer area, or 0 if none
3118 *		resultkq			Pointer to result kqueue
3119 *						pointer area, or 0 if none
3120 *
3121 * Returns:	EBADF			The file descriptor is invalid
3122 *		EBADF			The file descriptor is not a socket
3123 *		0			Success
3124 *
3125 * Implicit returns:
3126 *		*resultfp (modified)		Fileproc pointer
3127 *		*resultkq (modified)		kqueue pointer
3128 *
3129 * Notes:	The second EBADF should probably be something else to make
3130 *		the error condition distinct.
3131 */
3132int
3133fp_getfkq(proc_t p, int fd, struct fileproc **resultfp,
3134	  struct kqueue **resultkq)
3135{
3136	struct filedesc *fdp = p->p_fd;
3137	struct fileproc *fp;
3138
3139	proc_fdlock_spin(p);
3140	if ( fd < 0 || fd >= fdp->fd_nfiles ||
3141			(fp = fdp->fd_ofiles[fd]) == NULL ||
3142			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3143		proc_fdunlock(p);
3144		return (EBADF);
3145	}
3146	if (fp->f_type != DTYPE_KQUEUE) {
3147		proc_fdunlock(p);
3148		return(EBADF);
3149	}
3150	fp->f_iocount++;
3151
3152	if (resultfp)
3153		*resultfp = fp;
3154	if (resultkq)
3155		*resultkq = (struct kqueue *)fp->f_data;
3156	proc_fdunlock(p);
3157
3158	return (0);
3159}
3160
3161
3162/*
3163 * fp_getfpshm
3164 *
3165 * Description:	Get fileproc and POSIX shared memory pointer for a given fd
3166 *		from the per process open file table of the specified process
3167 *		and if successful, increment the f_iocount
3168 *
3169 * Parameters:	p				Process in which fd lives
3170 *		fd				fd to get information for
3171 *		resultfp			Pointer to result fileproc
3172 *						pointer area, or 0 if none
3173 *		resultpshm			Pointer to result POSIX
3174 *						shared memory pointer
3175 *						pointer area, or 0 if none
3176 *
3177 * Returns:	EBADF			The file descriptor is invalid
3178 *		EBADF			The file descriptor is not a POSIX
3179 *					shared memory area
3180 *		0			Success
3181 *
3182 * Implicit returns:
3183 *		*resultfp (modified)		Fileproc pointer
3184 *		*resultpshm (modified)		POSIX shared memory pointer
3185 *
3186 * Notes:	The second EBADF should probably be something else to make
3187 *		the error condition distinct.
3188 */
3189int
3190fp_getfpshm(proc_t p, int fd, struct fileproc **resultfp,
3191	    struct pshmnode **resultpshm)
3192{
3193	struct filedesc *fdp = p->p_fd;
3194	struct fileproc *fp;
3195
3196	proc_fdlock_spin(p);
3197	if (fd < 0 || fd >= fdp->fd_nfiles ||
3198			(fp = fdp->fd_ofiles[fd]) == NULL ||
3199			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3200		proc_fdunlock(p);
3201		return (EBADF);
3202	}
3203	if (fp->f_type != DTYPE_PSXSHM) {
3204
3205		proc_fdunlock(p);
3206		return(EBADF);
3207	}
3208	fp->f_iocount++;
3209
3210	if (resultfp)
3211		*resultfp = fp;
3212	if (resultpshm)
3213		*resultpshm = (struct pshmnode *)fp->f_data;
3214	proc_fdunlock(p);
3215
3216	return (0);
3217}
3218
3219
3220/*
3221 * fp_getfsem
3222 *
3223 * Description:	Get fileproc and POSIX semaphore pointer for a given fd from
3224 *		the per process open file table of the specified process
3225 *		and if successful, increment the f_iocount
3226 *
3227 * Parameters:	p				Process in which fd lives
3228 *		fd				fd to get information for
3229 *		resultfp			Pointer to result fileproc
3230 *						pointer area, or 0 if none
3231 *		resultpsem			Pointer to result POSIX
3232 *						semaphore pointer area, or
3233 *						0 if none
3234 *
3235 * Returns:	EBADF			The file descriptor is invalid
3236 *		EBADF			The file descriptor is not a POSIX
3237 *					semaphore
3238 *		0			Success
3239 *
3240 * Implicit returns:
3241 *		*resultfp (modified)		Fileproc pointer
3242 *		*resultpsem (modified)		POSIX semaphore pointer
3243 *
3244 * Notes:	The second EBADF should probably be something else to make
3245 *		the error condition distinct.
3246 *
3247 *		In order to support unnamed POSIX semaphores, the named
3248 *		POSIX semaphores will have to move out of the per-process
3249 *		open filetable, and into a global table that is shared with
3250 *		unnamed POSIX semaphores, since unnamed POSIX semaphores
3251 *		are typically used by declaring instances in shared memory,
3252 *		and there's no other way to do this without changing the
3253 *		underlying type, which would introduce binary compatibility
3254 *		issues.
3255 */
3256int
3257fp_getfpsem(proc_t p, int fd, struct fileproc **resultfp,
3258	    struct psemnode **resultpsem)
3259{
3260	struct filedesc *fdp = p->p_fd;
3261	struct fileproc *fp;
3262
3263	proc_fdlock_spin(p);
3264	if (fd < 0 || fd >= fdp->fd_nfiles ||
3265			(fp = fdp->fd_ofiles[fd]) == NULL ||
3266			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3267		proc_fdunlock(p);
3268		return (EBADF);
3269	}
3270	if (fp->f_type != DTYPE_PSXSEM) {
3271		proc_fdunlock(p);
3272		return(EBADF);
3273	}
3274	fp->f_iocount++;
3275
3276	if (resultfp)
3277		*resultfp = fp;
3278	if (resultpsem)
3279		*resultpsem = (struct psemnode *)fp->f_data;
3280	proc_fdunlock(p);
3281
3282	return (0);
3283}
3284
3285
3286/*
3287 * fp_getfpipe
3288 *
3289 * Description:	Get fileproc and pipe pointer for a given fd from the
3290 *		per process open file table of the specified process
3291 *		and if successful, increment the f_iocount
3292 *
3293 * Parameters:	p				Process in which fd lives
3294 *		fd				fd to get information for
3295 *		resultfp			Pointer to result fileproc
3296 *						pointer area, or 0 if none
3297 *		resultpipe			Pointer to result pipe
3298 *						pointer area, or 0 if none
3299 *
3300 * Returns:	EBADF			The file descriptor is invalid
3301 *		EBADF			The file descriptor is not a socket
3302 *		0			Success
3303 *
3304 * Implicit returns:
3305 *		*resultfp (modified)		Fileproc pointer
3306 *		*resultpipe (modified)		pipe pointer
3307 *
3308 * Notes:	The second EBADF should probably be something else to make
3309 *		the error condition distinct.
3310 */
3311int
3312fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp,
3313	    struct pipe **resultpipe)
3314{
3315	struct filedesc *fdp = p->p_fd;
3316	struct fileproc *fp;
3317
3318	proc_fdlock_spin(p);
3319	if (fd < 0 || fd >= fdp->fd_nfiles ||
3320			(fp = fdp->fd_ofiles[fd]) == NULL ||
3321			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3322		proc_fdunlock(p);
3323		return (EBADF);
3324	}
3325	if (fp->f_type != DTYPE_PIPE) {
3326		proc_fdunlock(p);
3327		return(EBADF);
3328	}
3329	fp->f_iocount++;
3330
3331	if (resultfp)
3332		*resultfp = fp;
3333	if (resultpipe)
3334		*resultpipe = (struct pipe *)fp->f_data;
3335	proc_fdunlock(p);
3336
3337	return (0);
3338}
3339
3340#if NETAT
3341#define DTYPE_ATALK -1		/* XXX This does not belong here */
3342
3343
3344/*
3345 * fp_getfatalk
3346 *
3347 * Description:	Get fileproc and atalk pointer for a given fd from the
3348 *		per process open file table of the specified process
3349 *		and if successful, increment the f_iocount
3350 *
3351 * Parameters:	p				Process in which fd lives
3352 *		fd				fd to get information for
3353 *		resultfp			Pointer to result fileproc
3354 *						pointer area, or 0 if none
3355 *		resultatalk			Pointer to result atalk
3356 *						pointer area, or 0 if none
3357 * Returns:	EBADF			The file descriptor is invalid
3358 *		EBADF			The file descriptor is not a socket
3359 *		0			Success
3360 *
3361 * Implicit returns:
3362 *		*resultfp (modified)		Fileproc pointer
3363 *		*resultatalk (modified)		atalk pointer
3364 *
3365 * Notes:	The second EBADF should probably be something else to make
3366 *		the error condition distinct.
3367 *
3368 *		XXX This code is specific to AppleTalk protocol support, and
3369 *		XXX should be conditionally compiled
3370 */
3371int
3372fp_getfatalk(proc_t p, int fd, struct fileproc **resultfp,
3373	     struct atalk **resultatalk)
3374{
3375	struct filedesc *fdp = p->p_fd;
3376	struct fileproc *fp;
3377
3378	proc_fdlock_spin(p);
3379	if (fd < 0 || fd >= fdp->fd_nfiles ||
3380			(fp = fdp->fd_ofiles[fd]) == NULL ||
3381			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3382		proc_fdunlock(p);
3383		return (EBADF);
3384	}
3385	if (fp->f_type != (DTYPE_ATALK+1)) {
3386		proc_fdunlock(p);
3387		return(EBADF);
3388	}
3389	fp->f_iocount++;
3390
3391	if (resultfp)
3392		*resultfp = fp;
3393	if (resultatalk)
3394		*resultatalk = (struct atalk *)fp->f_data;
3395	proc_fdunlock(p);
3396
3397	return (0);
3398}
3399
3400#endif /* NETAT */
3401
3402/*
3403 * fp_lookup
3404 *
3405 * Description:	Get fileproc pointer for a given fd from the per process
3406 *		open file table of the specified process and if successful,
3407 *		increment the f_iocount
3408 *
3409 * Parameters:	p				Process in which fd lives
3410 *		fd				fd to get information for
3411 *		resultfp			Pointer to result fileproc
3412 *						pointer area, or 0 if none
3413 *		locked				!0 if the caller holds the
3414 *						proc_fdlock, 0 otherwise
3415 *
3416 * Returns:	0			Success
3417 *		EBADF			Bad file descriptor
3418 *
3419 * Implicit returns:
3420 *		*resultfp (modified)		Fileproc pointer
3421 *
3422 * Locks:	If the argument 'locked' is non-zero, then the caller is
3423 *		expected to have taken and held the proc_fdlock; if it is
3424 *		zero, than this routine internally takes and drops this lock.
3425 */
3426int
3427fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
3428{
3429	struct filedesc *fdp = p->p_fd;
3430	struct fileproc *fp;
3431
3432	if (!locked)
3433		proc_fdlock_spin(p);
3434	if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
3435			(fp = fdp->fd_ofiles[fd]) == NULL ||
3436			(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3437		if (!locked)
3438			proc_fdunlock(p);
3439		return (EBADF);
3440	}
3441	fp->f_iocount++;
3442
3443	if (resultfp)
3444		*resultfp = fp;
3445	if (!locked)
3446		proc_fdunlock(p);
3447
3448	return (0);
3449}
3450
3451
3452/*
3453 * fp_drop_written
3454 *
3455 * Description:	Set the FP_WRITTEN flag on the fileproc and drop the I/O
3456 *		reference previously taken by calling fp_lookup et. al.
3457 *
3458 * Parameters:	p				Process in which the fd lives
3459 *		fd				fd associated with the fileproc
3460 *		fp				fileproc on which to set the
3461 *						flag and drop the reference
3462 *
3463 * Returns:	0				Success
3464 *	fp_drop:EBADF				Bad file descriptor
3465 *
3466 * Locks:	This function internally takes and drops the proc_fdlock for
3467 *		the supplied process
3468 *
3469 * Notes:	The fileproc must correspond to the fd in the supplied proc
3470 */
3471int
3472fp_drop_written(proc_t p, int fd, struct fileproc *fp)
3473{
3474        int error;
3475
3476	proc_fdlock_spin(p);
3477
3478	fp->f_flags |= FP_WRITTEN;
3479
3480	error = fp_drop(p, fd, fp, 1);
3481
3482	proc_fdunlock(p);
3483
3484	return (error);
3485}
3486
3487
3488/*
3489 * fp_drop_event
3490 *
3491 * Description:	Set the FP_WAITEVENT flag on the fileproc and drop the I/O
3492 *		reference previously taken by calling fp_lookup et. al.
3493 *
3494 * Parameters:	p				Process in which the fd lives
3495 *		fd				fd associated with the fileproc
3496 *		fp				fileproc on which to set the
3497 *						flag and drop the reference
3498 *
3499 * Returns:	0				Success
3500 *	fp_drop:EBADF				Bad file descriptor
3501 *
3502 * Locks:	This function internally takes and drops the proc_fdlock for
3503 *		the supplied process
3504 *
3505 * Notes:	The fileproc must correspond to the fd in the supplied proc
3506 */
3507int
3508fp_drop_event(proc_t p, int fd, struct fileproc *fp)
3509{
3510        int error;
3511
3512	proc_fdlock_spin(p);
3513
3514	fp->f_flags |= FP_WAITEVENT;
3515
3516	error = fp_drop(p, fd, fp, 1);
3517
3518	proc_fdunlock(p);
3519
3520	return (error);
3521}
3522
3523
3524/*
3525 * fp_drop
3526 *
3527 * Description:	Drop the I/O reference previously taken by calling fp_lookup
3528 *		et. al.
3529 *
3530 * Parameters:	p				Process in which the fd lives
3531 *		fd				fd associated with the fileproc
3532 *		fp				fileproc on which to set the
3533 *						flag and drop the reference
3534 *		locked				flag to internally take and
3535 *						drop proc_fdlock if it is not
3536 *						already held by the caller
3537 *
3538 * Returns:	0				Success
3539 *		EBADF				Bad file descriptor
3540 *
3541 * Locks:	This function internally takes and drops the proc_fdlock for
3542 *		the supplied process if 'locked' is non-zero, and assumes that
3543 *		the caller already holds this lock if 'locked' is non-zero.
3544 *
3545 * Notes:	The fileproc must correspond to the fd in the supplied proc
3546 */
3547int
3548fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
3549{
3550	struct filedesc *fdp = p->p_fd;
3551	int	needwakeup = 0;
3552
3553	if (!locked)
3554		proc_fdlock_spin(p);
3555	 if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
3556			(fp = fdp->fd_ofiles[fd]) == NULL ||
3557			((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
3558			 !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
3559		if (!locked)
3560			proc_fdunlock(p);
3561		return (EBADF);
3562	}
3563	fp->f_iocount--;
3564
3565	if (fp->f_iocount == 0) {
3566		if (fp->f_flags & FP_SELCONFLICT)
3567			fp->f_flags &= ~FP_SELCONFLICT;
3568
3569		if (p->p_fpdrainwait) {
3570			p->p_fpdrainwait = 0;
3571			needwakeup = 1;
3572		}
3573	}
3574	if (!locked)
3575		proc_fdunlock(p);
3576	if (needwakeup)
3577	        wakeup(&p->p_fpdrainwait);
3578
3579	return (0);
3580}
3581
3582
3583/*
3584 * file_vnode
3585 *
3586 * Description:	Given an fd, look it up in the current process's per process
3587 *		open file table, and return its internal vnode pointer.
3588 *
3589 * Parameters:	fd				fd to obtain vnode from
3590 *		vpp				pointer to vnode return area
3591 *
3592 * Returns:	0				Success
3593 *		EINVAL				The fd does not refer to a
3594 *						vnode fileproc entry
3595 *	fp_lookup:EBADF				Bad file descriptor
3596 *
3597 * Implicit returns:
3598 *		*vpp (modified)			Returned vnode pointer
3599 *
3600 * Locks:	This function internally takes and drops the proc_fdlock for
3601 *		the current process
3602 *
3603 * Notes:	If successful, this function increments the f_iocount on the
3604 *		fd's corresponding fileproc.
3605 *
3606 *		The fileproc referenced is not returned; because of this, care
3607 *		must be taken to not drop the last reference (e.g. by closing
3608 *		the file).  This is inherently unsafe, since the reference may
3609 *		not be recoverable from the vnode, if there is a subsequent
3610 *		close that destroys the associate fileproc.  The caller should
3611 *		therefore retain their own reference on the fileproc so that
3612 *		the f_iocount can be dropped subsequently.  Failure to do this
3613 *		can result in the returned pointer immediately becoming invalid
3614 *		following the call.
3615 *
3616 *		Use of this function is discouraged.
3617 */
3618int
3619file_vnode(int fd, struct vnode **vpp)
3620{
3621	proc_t p = current_proc();
3622	struct fileproc *fp;
3623	int error;
3624
3625	proc_fdlock_spin(p);
3626	if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
3627		proc_fdunlock(p);
3628		return(error);
3629	}
3630	if (fp->f_type != DTYPE_VNODE) {
3631		fp_drop(p, fd, fp,1);
3632		proc_fdunlock(p);
3633		return(EINVAL);
3634	}
3635	if (vpp != NULL)
3636		*vpp = (struct vnode *)fp->f_data;
3637	proc_fdunlock(p);
3638
3639	return(0);
3640}
3641
3642
3643/*
3644 * file_vnode_withvid
3645 *
3646 * Description:	Given an fd, look it up in the current process's per process
3647 *		open file table, and return its internal vnode pointer.
3648 *
3649 * Parameters:	fd				fd to obtain vnode from
3650 *		vpp				pointer to vnode return area
3651 *		vidp				pointer to vid of the returned vnode
3652 *
3653 * Returns:	0				Success
3654 *		EINVAL				The fd does not refer to a
3655 *						vnode fileproc entry
3656 *	fp_lookup:EBADF				Bad file descriptor
3657 *
3658 * Implicit returns:
3659 *		*vpp (modified)			Returned vnode pointer
3660 *
3661 * Locks:	This function internally takes and drops the proc_fdlock for
3662 *		the current process
3663 *
3664 * Notes:	If successful, this function increments the f_iocount on the
3665 *		fd's corresponding fileproc.
3666 *
3667 *		The fileproc referenced is not returned; because of this, care
3668 *		must be taken to not drop the last reference (e.g. by closing
3669 *		the file).  This is inherently unsafe, since the reference may
3670 *		not be recoverable from the vnode, if there is a subsequent
3671 *		close that destroys the associate fileproc.  The caller should
3672 *		therefore retain their own reference on the fileproc so that
3673 *		the f_iocount can be dropped subsequently.  Failure to do this
3674 *		can result in the returned pointer immediately becoming invalid
3675 *		following the call.
3676 *
3677 *		Use of this function is discouraged.
3678 */
3679int
3680file_vnode_withvid(int fd, struct vnode **vpp, uint32_t * vidp)
3681{
3682	proc_t p = current_proc();
3683	struct fileproc *fp;
3684	vnode_t vp;
3685	int error;
3686
3687	proc_fdlock_spin(p);
3688	if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
3689		proc_fdunlock(p);
3690		return(error);
3691	}
3692	if (fp->f_type != DTYPE_VNODE) {
3693		fp_drop(p, fd, fp,1);
3694		proc_fdunlock(p);
3695		return(EINVAL);
3696	}
3697	vp = (struct vnode *)fp->f_data;
3698	if (vpp != NULL)
3699		*vpp = vp;
3700
3701	if ((vidp != NULL) && (vp != NULLVP))
3702		*vidp = (uint32_t)vp->v_id;
3703
3704	proc_fdunlock(p);
3705
3706	return(0);
3707}
3708
3709
3710/*
3711 * file_socket
3712 *
3713 * Description:	Given an fd, look it up in the current process's per process
3714 *		open file table, and return its internal socket pointer.
3715 *
3716 * Parameters:	fd				fd to obtain vnode from
3717 *		sp				pointer to socket return area
3718 *
3719 * Returns:	0				Success
3720 *		ENOTSOCK			Not a socket
3721 *		fp_lookup:EBADF			Bad file descriptor
3722 *
3723 * Implicit returns:
3724 *		*sp (modified)			Returned socket pointer
3725 *
3726 * Locks:	This function internally takes and drops the proc_fdlock for
3727 *		the current process
3728 *
3729 * Notes:	If successful, this function increments the f_iocount on the
3730 *		fd's corresponding fileproc.
3731 *
3732 *		The fileproc referenced is not returned; because of this, care
3733 *		must be taken to not drop the last reference (e.g. by closing
3734 *		the file).  This is inherently unsafe, since the reference may
3735 *		not be recoverable from the socket, if there is a subsequent
3736 *		close that destroys the associate fileproc.  The caller should
3737 *		therefore retain their own reference on the fileproc so that
3738 *		the f_iocount can be dropped subsequently.  Failure to do this
3739 *		can result in the returned pointer immediately becoming invalid
3740 *		following the call.
3741 *
3742 *		Use of this function is discouraged.
3743 */
3744int
3745file_socket(int fd, struct socket **sp)
3746{
3747	proc_t p = current_proc();
3748	struct fileproc *fp;
3749	int error;
3750
3751	proc_fdlock_spin(p);
3752	if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
3753		proc_fdunlock(p);
3754		return(error);
3755	}
3756	if (fp->f_type != DTYPE_SOCKET) {
3757		fp_drop(p, fd, fp,1);
3758		proc_fdunlock(p);
3759		return(ENOTSOCK);
3760	}
3761	*sp = (struct socket *)fp->f_data;
3762	proc_fdunlock(p);
3763
3764	return(0);
3765}
3766
3767
3768/*
3769 * file_flags
3770 *
3771 * Description:	Given an fd, look it up in the current process's per process
3772 *		open file table, and return its fileproc's flags field.
3773 *
3774 * Parameters:	fd				fd whose flags are to be
3775 *						retrieved
3776 *		flags				pointer to flags data area
3777 *
3778 * Returns:	0				Success
3779 *		ENOTSOCK			Not a socket
3780 *		fp_lookup:EBADF			Bad file descriptor
3781 *
3782 * Implicit returns:
3783 *		*flags (modified)		Returned flags field
3784 *
3785 * Locks:	This function internally takes and drops the proc_fdlock for
3786 *		the current process
3787 *
3788 * Notes:	This function will internally increment and decrement the
3789 *		f_iocount of the fileproc as part of its operation.
3790 */
3791int
3792file_flags(int fd, int *flags)
3793{
3794
3795	proc_t p = current_proc();
3796	struct fileproc *fp;
3797	int error;
3798
3799	proc_fdlock_spin(p);
3800	if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
3801		proc_fdunlock(p);
3802		return(error);
3803	}
3804	*flags = (int)fp->f_flag;
3805	fp_drop(p, fd, fp,1);
3806	proc_fdunlock(p);
3807
3808	return(0);
3809}
3810
3811
3812/*
3813 * file_drop
3814 *
3815 * Description:	Drop an iocount reference on an fd, and wake up any waiters
3816 *		for draining (i.e. blocked in fileproc_drain() called during
3817 *		the last attempt to close a file).
3818 *
3819 * Parameters:	fd				fd on which an ioreference is
3820 *						to be dropped
3821 *
3822 * Returns:	0				Success
3823 *		EBADF				Bad file descriptor
3824 *
3825 * Description:	Given an fd, look it up in the current process's per process
3826 *		open file table, and drop it's fileproc's f_iocount by one
3827 *
3828 * Notes:	This is intended as a corresponding operation to the functions
3829 *		file_vnode() and file_socket() operations.
3830 *
3831 *		Technically, the close reference is supposed to be protected
3832 *		by a fileproc_drain(), however, a drain will only block if
3833 *		the fd refers to a character device, and that device has had
3834 *		preparefileread() called on it.  If it refers to something
3835 *		other than a character device, then the drain will occur and
3836 *		block each close attempt, rather than merely the last close.
3837 *
3838 *		Since it's possible for an fd that refers to a character
3839 *		device to have an intermediate close followed by an open to
3840 *		cause a different file to correspond to that descriptor,
3841 *		unless there was a cautionary reference taken on the fileproc,
3842 *		this is an inherently unsafe function.  This happens in the
3843 *		case where multiple fd's in a process refer to the same
3844 *		character device (e.g. stdin/out/err pointing to a tty, etc.).
3845 *
3846 *		Use of this function is discouraged.
3847 */
3848int
3849file_drop(int fd)
3850{
3851	struct fileproc *fp;
3852	proc_t p = current_proc();
3853	int	needwakeup = 0;
3854
3855	proc_fdlock_spin(p);
3856	if (fd < 0 || fd >= p->p_fd->fd_nfiles ||
3857			(fp = p->p_fd->fd_ofiles[fd]) == NULL ||
3858			((p->p_fd->fd_ofileflags[fd] & UF_RESERVED) &&
3859			 !(p->p_fd->fd_ofileflags[fd] & UF_CLOSING))) {
3860		proc_fdunlock(p);
3861		return (EBADF);
3862	}
3863	fp->f_iocount --;
3864
3865	if (fp->f_iocount == 0) {
3866		if (fp->f_flags & FP_SELCONFLICT)
3867			fp->f_flags &= ~FP_SELCONFLICT;
3868
3869		if (p->p_fpdrainwait) {
3870			p->p_fpdrainwait = 0;
3871			needwakeup = 1;
3872		}
3873	}
3874	proc_fdunlock(p);
3875
3876	if (needwakeup)
3877	        wakeup(&p->p_fpdrainwait);
3878	return(0);
3879}
3880
3881
3882/*
3883 * falloc
3884 *
3885 * Description:	Allocate an entry in the per process open file table and
3886 *		return the corresponding fileproc and fd.
3887 *
3888 * Parameters:	p				The process in whose open file
3889 *						table the fd is to be allocated
3890 *		resultfp			Pointer to fileproc pointer
3891 *						return area
3892 *		resultfd			Pointer to fd return area
3893 *		ctx				VFS context
3894 *
3895 * Returns:	0				Success
3896 *	falloc:ENFILE				Too many open files in system
3897 *	falloc:EMFILE				Too many open files in process
3898 *	falloc:ENOMEM				M_FILEPROC or M_FILEGLOB zone
3899 *						exhausted
3900 *
3901 * Implicit returns:
3902 *		*resultfd (modified)		Returned fileproc pointer
3903 *		*resultfd (modified)		Returned fd
3904 *
3905 * Locks:	This function takes and drops the proc_fdlock; if this lock
3906 *		is already held, use falloc_locked() instead.
3907 *
3908 * Notes:	This function takes separate process and context arguments
3909 *		solely to support kern_exec.c; otherwise, it would take
3910 *		neither, and expect falloc_locked() to use the
3911 *		vfs_context_current() routine internally.
3912 */
3913int
3914falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
3915{
3916	int error;
3917
3918	proc_fdlock(p);
3919	error = falloc_locked(p, resultfp, resultfd, ctx, 1);
3920	proc_fdunlock(p);
3921
3922	return(error);
3923}
3924
3925
3926/*
3927 * falloc_locked
3928 *
3929 * Create a new open file structure and allocate
3930 * a file descriptor for the process that refers to it.
3931 *
3932 * Returns:	0			Success
3933 *
3934 * Description:	Allocate an entry in the per process open file table and
3935 *		return the corresponding fileproc and fd.
3936 *
3937 * Parameters:	p				The process in whose open file
3938 *						table the fd is to be allocated
3939 *		resultfp			Pointer to fileproc pointer
3940 *						return area
3941 *		resultfd			Pointer to fd return area
3942 *		ctx				VFS context
3943 *		locked				Flag to indicate whether the
3944 *						caller holds proc_fdlock
3945 *
3946 * Returns:	0				Success
3947 *		ENFILE				Too many open files in system
3948 *		fdalloc:EMFILE			Too many open files in process
3949 *		ENOMEM				M_FILEPROC or M_FILEGLOB zone
3950 *						exhausted
3951 *	fdalloc:ENOMEM
3952 *
3953 * Implicit returns:
3954 *		*resultfd (modified)		Returned fileproc pointer
3955 *		*resultfd (modified)		Returned fd
3956 *
3957 * Locks:	If the parameter 'locked' is zero, this function takes and
3958 *		drops the proc_fdlock; if non-zero, the caller must hold the
3959 *		lock.
3960 *
3961 * Notes:	If you intend to use a non-zero 'locked' parameter, use the
3962 *		utility function falloc() instead.
3963 *
3964 *		This function takes separate process and context arguments
3965 *		solely to support kern_exec.c; otherwise, it would take
3966 *		neither, and use the vfs_context_current() routine internally.
3967 */
3968int
3969falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
3970	      vfs_context_t ctx, int locked)
3971{
3972	struct fileproc *fp;
3973	struct fileglob *fg;
3974	int error, nfd;
3975
3976	if (!locked)
3977		proc_fdlock(p);
3978	if ( (error = fdalloc(p, 0, &nfd)) ) {
3979		if (!locked)
3980			proc_fdunlock(p);
3981		return (error);
3982	}
3983	if (nfiles >= maxfiles) {
3984		if (!locked)
3985			proc_fdunlock(p);
3986		tablefull("file");
3987		return (ENFILE);
3988	}
3989#if CONFIG_MACF
3990	error = mac_file_check_create(proc_ucred(p));
3991	if (error) {
3992		if (!locked)
3993			proc_fdunlock(p);
3994		return (error);
3995	}
3996#endif
3997
3998	/*
3999	 * Allocate a new file descriptor.
4000	 * If the process has file descriptor zero open, add to the list
4001	 * of open files at that point, otherwise put it at the front of
4002	 * the list of open files.
4003	 */
4004	proc_fdunlock(p);
4005
4006	MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK);
4007	if (fp == NULL) {
4008		if (locked)
4009			proc_fdlock(p);
4010		return (ENOMEM);
4011	}
4012	MALLOC_ZONE(fg, struct fileglob *, sizeof(struct fileglob), M_FILEGLOB, M_WAITOK);
4013	if (fg == NULL) {
4014		FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4015		if (locked)
4016			proc_fdlock(p);
4017		return (ENOMEM);
4018	}
4019	bzero(fp, sizeof(struct fileproc));
4020	bzero(fg, sizeof(struct fileglob));
4021	lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4022
4023	fp->f_iocount = 1;
4024	fg->fg_count = 1;
4025	fp->f_fglob = fg;
4026#if CONFIG_MACF
4027	mac_file_label_init(fg);
4028#endif
4029
4030	kauth_cred_ref(ctx->vc_ucred);
4031
4032	proc_fdlock(p);
4033
4034	fp->f_cred = ctx->vc_ucred;
4035
4036#if CONFIG_MACF
4037	mac_file_label_associate(fp->f_cred, fg);
4038#endif
4039
4040	OSAddAtomic(1, &nfiles);
4041
4042	p->p_fd->fd_ofiles[nfd] = fp;
4043
4044	if (!locked)
4045		proc_fdunlock(p);
4046
4047	if (resultfp)
4048		*resultfp = fp;
4049	if (resultfd)
4050		*resultfd = nfd;
4051
4052	return (0);
4053}
4054
4055
4056/*
4057 * fg_free
4058 *
4059 * Description:	Free a file structure; drop the global open file count, and
4060 *		drop the credential reference, if the fileglob has one, and
4061 *		destroy the instance mutex before freeing
4062 *
4063 * Parameters:	fg				Pointer to fileglob to be
4064 *						freed
4065 *
4066 * Returns:	void
4067 */
4068void
4069fg_free(struct fileglob *fg)
4070{
4071	OSAddAtomic(-1, &nfiles);
4072
4073	if (IS_VALID_CRED(fg->fg_cred)) {
4074		kauth_cred_unref(&fg->fg_cred);
4075	}
4076	lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
4077
4078#if CONFIG_MACF
4079	mac_file_label_destroy(fg);
4080#endif
4081	FREE_ZONE(fg, sizeof *fg, M_FILEGLOB);
4082}
4083
4084
4085/*
4086 * fdexec
4087 *
4088 * Description:	Perform close-on-exec processing for all files in a process
4089 *		that are either marked as close-on-exec, or which were in the
4090 *		process of being opened at the time of the execve
4091 *
4092 *		Also handles the case (via posix_spawn()) where -all-
4093 *		files except those marked with "inherit" as treated as
4094 *		close-on-exec.
4095 *
4096 * Parameters:	p				Pointer to process calling
4097 *						execve
4098 *
4099 * Returns:	void
4100 *
4101 * Locks:	This function internally takes and drops proc_fdlock()
4102 *
4103 * Notes:	This function drops and retakes the kernel funnel; this is
4104 *		inherently unsafe, since another thread may have the
4105 *		proc_fdlock.
4106 *
4107 * XXX:		We should likely reverse the lock and funnel drop/acquire
4108 *		order to avoid the small race window; it's also possible that
4109 *		if the program doing the exec has an outstanding listen socket
4110 *		and a network connection is completed asynchronously that we
4111 *		will end up with a "ghost" socket reference in the new process.
4112 *
4113 *		This needs reworking to make it safe to remove the funnel from
4114 *		the execve and posix_spawn system calls.
4115 */
4116void
4117fdexec(proc_t p, short flags)
4118{
4119	struct filedesc *fdp = p->p_fd;
4120	int i;
4121	boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4122
4123	proc_fdlock(p);
4124	for (i = fdp->fd_lastfile; i >= 0; i--) {
4125
4126		struct fileproc *fp = fdp->fd_ofiles[i];
4127		char *flagp = &fdp->fd_ofileflags[i];
4128
4129		if (fp && cloexec_default) {
4130			/*
4131			 * Reverse the usual semantics of file descriptor
4132			 * inheritance - all of them should be closed
4133			 * except files marked explicitly as "inherit" and
4134			 * not marked close-on-exec.
4135			 */
4136			if ((*flagp & (UF_EXCLOSE|UF_INHERIT)) != UF_INHERIT)
4137				*flagp |= UF_EXCLOSE;
4138			*flagp &= ~UF_INHERIT;
4139		}
4140
4141		if (
4142		    ((*flagp & (UF_RESERVED|UF_EXCLOSE)) == UF_EXCLOSE)
4143#if CONFIG_MACF
4144		    || (fp && mac_file_check_inherit(proc_ucred(p), fp->f_fglob))
4145#endif
4146		) {
4147                        if (i < fdp->fd_knlistsize)
4148                                knote_fdclose(p, i);
4149			procfdtbl_clearfd(p, i);
4150			if (i == fdp->fd_lastfile && i > 0)
4151				fdp->fd_lastfile--;
4152			if (i < fdp->fd_freefile)
4153				fdp->fd_freefile = i;
4154
4155			/*
4156			 * Wait for any third party viewers (e.g., lsof)
4157			 * to release their references to this fileproc.
4158			 */
4159			while (fp->f_iocount > 0) {
4160				p->p_fpdrainwait = 1;
4161				msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO,
4162				    "fpdrain", NULL);
4163			}
4164
4165			closef_locked(fp, fp->f_fglob, p);
4166
4167			FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4168		}
4169	}
4170	proc_fdunlock(p);
4171}
4172
4173
4174/*
4175 * fdcopy
4176 *
4177 * Description:	Copy a filedesc structure.  This is normally used as part of
4178 *		forkproc() when forking a new process, to copy the per process
4179 *		open file table over to the new process.
4180 *
4181 * Parameters:	p				Process whose open file table
4182 *						is to be copied (parent)
4183 *		uth_cdir			Per thread current working
4184 *						cirectory, or NULL
4185 *
4186 * Returns:	NULL				Copy failed
4187 *		!NULL				Pointer to new struct filedesc
4188 *
4189 * Locks:	This function internally takes and drops proc_fdlock()
4190 *
4191 * Notes:	Files are copied directly, ignoring the new resource limits
4192 *		for the process that's being copied into.  Since the descriptor
4193 *		references are just additional references, this does not count
4194 *		against the number of open files on the system.
4195 *
4196 *		The struct filedesc includes the current working directory,
4197 *		and the current root directory, if the process is chroot'ed.
4198 *
4199 *		If the exec was called by a thread using a per thread current
4200 *		working directory, we inherit the working directory from the
4201 *		thread making the call, rather than from the process.
4202 *
4203 *		In the case of a failure to obtain a reference, for most cases,
4204 *		the file entry will be silently dropped.  There's an exception
4205 *		for the case of a chroot dir, since a failure to to obtain a
4206 *		reference there would constitute an "escape" from the chroot
4207 *		environment, which must not be allowed.  In that case, we will
4208 *		deny the execve() operation, rather than allowing the escape.
4209 */
4210struct filedesc *
4211fdcopy(proc_t p, vnode_t uth_cdir)
4212{
4213	struct filedesc *newfdp, *fdp = p->p_fd;
4214	int i;
4215	struct fileproc *ofp, *fp;
4216	vnode_t	v_dir;
4217
4218	MALLOC_ZONE(newfdp, struct filedesc *,
4219			sizeof(*newfdp), M_FILEDESC, M_WAITOK);
4220	if (newfdp == NULL)
4221		return(NULL);
4222
4223	proc_fdlock(p);
4224
4225	/*
4226	 * the FD_CHROOT flag will be inherited via this copy
4227	 */
4228	(void) memcpy(newfdp, fdp, sizeof(*newfdp));
4229
4230	/*
4231	 * If we are running with per-thread current working directories,
4232	 * inherit the new current working directory from the current thread
4233	 * instead, before we take our references.
4234	 */
4235	if (uth_cdir != NULLVP)
4236		newfdp->fd_cdir = uth_cdir;
4237
4238	/*
4239	 * For both fd_cdir and fd_rdir make sure we get
4240	 * a valid reference... if we can't, than set
4241	 * set the pointer(s) to NULL in the child... this
4242	 * will keep us from using a non-referenced vp
4243	 * and allows us to do the vnode_rele only on
4244	 * a properly referenced vp
4245	 */
4246	if ( (v_dir = newfdp->fd_cdir) ) {
4247	        if (vnode_getwithref(v_dir) == 0) {
4248		        if ( (vnode_ref(v_dir)) )
4249			        newfdp->fd_cdir = NULL;
4250			vnode_put(v_dir);
4251		} else
4252		        newfdp->fd_cdir = NULL;
4253	}
4254	if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4255	        /*
4256		 * we couldn't get a new reference on
4257		 * the current working directory being
4258		 * inherited... we might as well drop
4259		 * our reference from the parent also
4260		 * since the vnode has gone DEAD making
4261		 * it useless... by dropping it we'll
4262		 * be that much closer to recycling it
4263		 */
4264	        vnode_rele(fdp->fd_cdir);
4265		fdp->fd_cdir = NULL;
4266	}
4267
4268	if ( (v_dir = newfdp->fd_rdir) ) {
4269		if (vnode_getwithref(v_dir) == 0) {
4270			if ( (vnode_ref(v_dir)) )
4271			        newfdp->fd_rdir = NULL;
4272			vnode_put(v_dir);
4273		} else {
4274		        newfdp->fd_rdir = NULL;
4275		}
4276	}
4277	/* Coming from a chroot environment and unable to get a reference... */
4278	if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4279	        /*
4280		 * We couldn't get a new reference on
4281		 * the chroot directory being
4282		 * inherited... this is fatal, since
4283		 * otherwise it would constitute an
4284		 * escape from a chroot environment by
4285		 * the new process.
4286		 */
4287		if (newfdp->fd_cdir)
4288		        vnode_rele(newfdp->fd_cdir);
4289		FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC);
4290		return(NULL);
4291	}
4292	newfdp->fd_refcnt = 1;
4293
4294	/*
4295	 * If the number of open files fits in the internal arrays
4296	 * of the open file structure, use them, otherwise allocate
4297	 * additional memory for the number of descriptors currently
4298	 * in use.
4299	 */
4300	if (newfdp->fd_lastfile < NDFILE)
4301		i = NDFILE;
4302	else {
4303		/*
4304		 * Compute the smallest multiple of NDEXTENT needed
4305		 * for the file descriptors currently in use,
4306		 * allowing the table to shrink.
4307		 */
4308		i = newfdp->fd_nfiles;
4309		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
4310			i /= 2;
4311	}
4312	proc_fdunlock(p);
4313
4314	MALLOC_ZONE(newfdp->fd_ofiles, struct fileproc **,
4315				i * OFILESIZE, M_OFILETABL, M_WAITOK);
4316	if (newfdp->fd_ofiles == NULL) {
4317		if (newfdp->fd_cdir)
4318		        vnode_rele(newfdp->fd_cdir);
4319		if (newfdp->fd_rdir)
4320			vnode_rele(newfdp->fd_rdir);
4321
4322		FREE_ZONE(newfdp, sizeof(*newfdp), M_FILEDESC);
4323		return(NULL);
4324	}
4325	(void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
4326	proc_fdlock(p);
4327
4328	newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4329	newfdp->fd_nfiles = i;
4330
4331	if (fdp->fd_nfiles > 0) {
4332		struct fileproc **fpp;
4333		char *flags;
4334
4335		(void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4336					(newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
4337		(void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4338					(newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
4339
4340		/*
4341		 * kq descriptors cannot be copied.
4342		 */
4343		if (newfdp->fd_knlistsize != -1) {
4344			fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4345			for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
4346				if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
4347					*fpp = NULL;
4348					newfdp->fd_ofileflags[i] = 0;
4349					if (i < newfdp->fd_freefile)
4350						newfdp->fd_freefile = i;
4351				}
4352				if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
4353					newfdp->fd_lastfile--;
4354			}
4355			newfdp->fd_knlist = NULL;
4356			newfdp->fd_knlistsize = -1;
4357			newfdp->fd_knhash = NULL;
4358			newfdp->fd_knhashmask = 0;
4359		}
4360		fpp = newfdp->fd_ofiles;
4361		flags = newfdp->fd_ofileflags;
4362
4363		for (i = newfdp->fd_lastfile + 1; --i >= 0; fpp++, flags++)
4364			if ((ofp = *fpp) != NULL && !(*flags & UF_RESERVED)) {
4365				MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK);
4366				if (fp == NULL) {
4367					/*
4368					 * XXX no room to copy, unable to
4369					 * XXX safely unwind state at present
4370					 */
4371					*fpp = NULL;
4372				} else {
4373					bzero(fp, sizeof(struct fileproc));
4374					fp->f_flags = ofp->f_flags;
4375					//fp->f_iocount = ofp->f_iocount;
4376					fp->f_iocount = 0;
4377					fp->f_fglob = ofp->f_fglob;
4378					(void)fg_ref(fp);
4379					*fpp = fp;
4380				}
4381			} else {
4382				if (i < newfdp->fd_freefile)
4383					newfdp->fd_freefile = i;
4384				*fpp = NULL;
4385				*flags = 0;
4386			}
4387	}
4388
4389	proc_fdunlock(p);
4390	return (newfdp);
4391}
4392
4393
4394/*
4395 * fdfree
4396 *
4397 * Description:	Release a filedesc (per process open file table) structure;
4398 *		this is done on process exit(), or from forkproc_free() if
4399 *		the fork fails for some reason subsequent to a successful
4400 *		call to fdcopy()
4401 *
4402 * Parameters:	p				Pointer to process going away
4403 *
4404 * Returns:	void
4405 *
4406 * Locks:	This function internally takes and drops proc_fdlock()
4407 */
4408void
4409fdfree(proc_t p)
4410{
4411	struct filedesc *fdp;
4412	struct fileproc *fp;
4413	int i;
4414
4415	proc_fdlock(p);
4416
4417	/* Certain daemons might not have file descriptors */
4418	fdp = p->p_fd;
4419
4420	if ((fdp == NULL) || (--fdp->fd_refcnt > 0)) {
4421	        proc_fdunlock(p);
4422		return;
4423	}
4424	if (fdp->fd_refcnt == 0xffff)
4425	        panic("fdfree: bad fd_refcnt");
4426
4427	/* Last reference: the structure can't change out from under us */
4428
4429	if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
4430	        for (i = fdp->fd_lastfile; i >= 0; i--) {
4431			if ((fp = fdp->fd_ofiles[i]) != NULL) {
4432
4433			  if (fdp->fd_ofileflags[i] & UF_RESERVED)
4434			    	panic("fdfree: found fp with UF_RESERVED");
4435
4436				/* closef drops the iocount ... */
4437				if ((fp->f_flags & FP_INCHRREAD) != 0)
4438					fp->f_iocount++;
4439				procfdtbl_reservefd(p, i);
4440
4441				if (i < fdp->fd_knlistsize)
4442					knote_fdclose(p, i);
4443				if (fp->f_flags & FP_WAITEVENT)
4444					(void)waitevent_close(p, fp);
4445				(void) closef_locked(fp, fp->f_fglob, p);
4446				FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4447			}
4448		}
4449		FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
4450		fdp->fd_ofiles = NULL;
4451		fdp->fd_nfiles = 0;
4452	}
4453
4454	proc_fdunlock(p);
4455
4456	if (fdp->fd_cdir)
4457	        vnode_rele(fdp->fd_cdir);
4458	if (fdp->fd_rdir)
4459		vnode_rele(fdp->fd_rdir);
4460
4461	proc_fdlock_spin(p);
4462	p->p_fd = NULL;
4463	proc_fdunlock(p);
4464
4465	if (fdp->fd_knlist)
4466		FREE(fdp->fd_knlist, M_KQUEUE);
4467	if (fdp->fd_knhash)
4468		FREE(fdp->fd_knhash, M_KQUEUE);
4469
4470	FREE_ZONE(fdp, sizeof(*fdp), M_FILEDESC);
4471}
4472
4473
4474/*
4475 * closef_finish
4476 *
4477 * Description:	Called on last open instance for a fileglob for a file being
4478 *		closed.
4479 *
4480 * Parameters:	fp			Pointer to fileproc for fd
4481 *		fg			Pointer to fileglob for fd
4482 *		p			Pointer to proc structure
4483 *
4484 * Returns:	0			Success
4485 *	<fo_close>:???			Anything returnable by a per-fileops
4486 *					close function
4487 *
4488 * Note:	fp can only be non-NULL if p is also non-NULL.  If p is NULL,
4489 *		then fg must eith be locked (FHASLOCK) or must not have a
4490 *		type of DTYPE_VNODE.
4491 *
4492 *		On return, the fg is freed.
4493 *
4494 *		This function may block draining output to a character
4495 *		device on last close of that device.
4496 */
4497static int
4498closef_finish(struct fileproc *fp, struct fileglob *fg, proc_t p, vfs_context_t ctx)
4499{
4500	int error;
4501
4502
4503	/* fg_ops completed initialization? */
4504	if (fg->fg_ops)
4505		error = fo_close(fg, ctx);
4506	else
4507		error = 0;
4508
4509	/* if fp is non-NULL, drain it out */
4510	if (((fp != (struct fileproc *)0) && ((fp->f_flags & FP_INCHRREAD) != 0))) {
4511	        proc_fdlock_spin(p);
4512		if ( ((fp->f_flags & FP_INCHRREAD) != 0) ) {
4513		        fileproc_drain(p, fp);
4514		}
4515		proc_fdunlock(p);
4516	}
4517	fg_free(fg);
4518
4519	return (error);
4520}
4521
4522/*
4523 * closef_locked
4524 *
4525 * Description:	Internal form of closef; called with proc_fdlock held
4526 *
4527 * Parameters:	fp			Pointer to fileproc for fd
4528 *		fg			Pointer to fileglob for fd
4529 *		p			Pointer to proc structure
4530 *
4531 * Returns:	0			Success
4532 *	closef_finish:??? 		Anything returnable by a per-fileops
4533 *					close function
4534 *
4535 * Note:	Decrements reference count on file structure; if this was the
4536 *		last reference, then closef_finish() is called
4537 *
4538 *		p and fp are allowed to  be NULL when closing a file that was
4539 *		being passed in a message (but only if we are called when this
4540 *		is NOT the last reference).
4541 */
4542int
4543closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p)
4544{
4545	struct vnode *vp;
4546	struct flock lf;
4547	struct vfs_context context;
4548	int error;
4549
4550	if (fg == NULL) {
4551		return (0);
4552	}
4553
4554	/* Set up context with cred stashed in fg */
4555	if (p == current_proc())
4556		context.vc_thread = current_thread();
4557	else
4558		context.vc_thread = NULL;
4559	context.vc_ucred = fg->fg_cred;
4560
4561	/*
4562	 * POSIX record locking dictates that any close releases ALL
4563	 * locks owned by this process.  This is handled by setting
4564	 * a flag in the unlock to free ONLY locks obeying POSIX
4565	 * semantics, and not to free BSD-style file locks.
4566	 * If the descriptor was in a message, POSIX-style locks
4567	 * aren't passed with the descriptor.
4568	 */
4569	if (p && (p->p_ladvflag & P_LADVLOCK) && fg->fg_type == DTYPE_VNODE) {
4570		proc_fdunlock(p);
4571
4572		lf.l_whence = SEEK_SET;
4573		lf.l_start = 0;
4574		lf.l_len = 0;
4575		lf.l_type = F_UNLCK;
4576		vp = (struct vnode *)fg->fg_data;
4577
4578		if ( (error = vnode_getwithref(vp)) == 0 ) {
4579			(void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context);
4580			(void)vnode_put(vp);
4581		}
4582		proc_fdlock(p);
4583	}
4584	lck_mtx_lock_spin(&fg->fg_lock);
4585	fg->fg_count--;
4586
4587	if (fg->fg_count > 0) {
4588		lck_mtx_unlock(&fg->fg_lock);
4589		return (0);
4590	}
4591#if DIAGNOSTIC
4592	if (fg->fg_count != 0)
4593		panic("fg %p: being freed with bad fg_count (%d)", fg, fg->fg_count);
4594#endif
4595
4596	if (fp && (fp->f_flags & FP_WRITTEN))
4597	        fg->fg_flag |= FWASWRITTEN;
4598
4599	fg->fg_lflags |= FG_TERM;
4600	lck_mtx_unlock(&fg->fg_lock);
4601
4602	if (p)
4603		proc_fdunlock(p);
4604	error = closef_finish(fp, fg, p, &context);
4605	if (p)
4606		proc_fdlock(p);
4607
4608	return(error);
4609}
4610
4611
4612/*
4613 * fileproc_drain
4614 *
4615 * Description:	Drain out pending I/O operations
4616 *
4617 * Parameters:	p				Process closing this file
4618 *		fp				fileproc struct for the open
4619 *						instance on the file
4620 *
4621 * Returns:	void
4622 *
4623 * Locks:	Assumes the caller holds the proc_fdlock
4624 *
4625 * Notes:	For character devices, this occurs on the last close of the
4626 *		device; for all other file descriptors, this occurs on each
4627 *		close to prevent fd's from being closed out from under
4628 *		operations currently in progress and blocked
4629 *
4630 * See Also: 	file_vnode(), file_socket(), file_drop(), and the cautions
4631 *		regarding their use and interaction with this function.
4632 */
4633void
4634fileproc_drain(proc_t p, struct fileproc * fp)
4635{
4636	struct vfs_context context;
4637
4638	context.vc_thread = proc_thread(p);	/* XXX */
4639	context.vc_ucred = fp->f_fglob->fg_cred;
4640
4641	fp->f_iocount-- ; /* (the one the close holds) */
4642
4643	while (fp->f_iocount) {
4644
4645	        lck_mtx_convert_spin(&p->p_fdmlock);
4646
4647		if (fp->f_fglob->fg_ops->fo_drain) {
4648			(*fp->f_fglob->fg_ops->fo_drain)(fp, &context);
4649		}
4650		if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) {
4651			if (wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
4652				panic("bad wait queue for wait_queue_wakeup_all %p", fp->f_waddr);
4653		}
4654		if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
4655			if (wait_queue_wakeup_all(&select_conflict_queue, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
4656				panic("bad select_conflict_queue");
4657		}
4658		p->p_fpdrainwait = 1;
4659
4660		msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
4661
4662	}
4663#if DIAGNOSTIC
4664	if ((fp->f_flags & FP_INSELECT) != 0)
4665		panic("FP_INSELECT set on drained fp");
4666#endif
4667	if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT)
4668		fp->f_flags &= ~FP_SELCONFLICT;
4669}
4670
4671
4672/*
4673 * fp_free
4674 *
4675 * Description:	Release the fd and free the fileproc associated with the fd
4676 *		in the per process open file table of the specified process;
4677 *		these values must correspond.
4678 *
4679 * Parameters:	p				Process containing fd
4680 *		fd				fd to be released
4681 *		fp				fileproc to be freed
4682 *
4683 * Returns:	0				Success
4684 *
4685 * Notes:	XXX function should be void - no one interprets the returns
4686 *		XXX code
4687 */
4688int
4689fp_free(proc_t p, int fd, struct fileproc * fp)
4690{
4691        proc_fdlock_spin(p);
4692	fdrelse(p, fd);
4693        proc_fdunlock(p);
4694
4695	fg_free(fp->f_fglob);
4696	FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4697	return(0);
4698}
4699
4700
4701/*
4702 * flock
4703 *
4704 * Description:	Apply an advisory lock on a file descriptor.
4705 *
4706 * Parameters:	p				Process making request
4707 *		uap->fd				fd on which the lock is to be
4708 *						attempted
4709 *		uap->how			(Un)Lock bits, including type
4710 *		retval				Pointer to the call return area
4711 *
4712 * Returns:	0				Success
4713 *	fp_getfvp:EBADF				Bad file descriptor
4714 *	fp_getfvp:ENOTSUP			fd does not refer to a vnode
4715 *	vnode_getwithref:???
4716 *	VNOP_ADVLOCK:???
4717 *
4718 * Implicit returns:
4719 *		*retval (modified)		Size of dtable
4720 *
4721 * Notes:	Just attempt to get a record lock of the requested type on
4722 *		the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
4723 */
4724int
4725flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
4726{
4727	int fd = uap->fd;
4728	int how = uap->how;
4729	struct fileproc *fp;
4730	struct vnode *vp;
4731	struct flock lf;
4732	vfs_context_t ctx = vfs_context_current();
4733	int error=0;
4734
4735	AUDIT_ARG(fd, uap->fd);
4736	if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
4737		return(error);
4738	}
4739	if ( (error = vnode_getwithref(vp)) ) {
4740		goto out1;
4741	}
4742	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4743
4744	lf.l_whence = SEEK_SET;
4745	lf.l_start = 0;
4746	lf.l_len = 0;
4747	if (how & LOCK_UN) {
4748		lf.l_type = F_UNLCK;
4749		fp->f_flag &= ~FHASLOCK;
4750		error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx);
4751		goto out;
4752	}
4753	if (how & LOCK_EX)
4754		lf.l_type = F_WRLCK;
4755	else if (how & LOCK_SH)
4756		lf.l_type = F_RDLCK;
4757	else {
4758	        error = EBADF;
4759		goto out;
4760	}
4761#if CONFIG_MACF
4762	error = mac_file_check_lock(proc_ucred(p), fp->f_fglob, F_SETLK, &lf);
4763	if (error)
4764		goto out;
4765#endif
4766	fp->f_flag |= FHASLOCK;
4767	if (how & LOCK_NB) {
4768		error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK, ctx);
4769		goto out;
4770	}
4771	error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK|F_WAIT, ctx);
4772out:
4773	(void)vnode_put(vp);
4774out1:
4775	fp_drop(p, fd, fp, 0);
4776	return(error);
4777
4778}
4779
4780/*
4781 * fileport_makeport
4782 *
4783 * Description: Obtain a Mach send right for a given file descriptor.
4784 *
4785 * Parameters:	p		Process calling fileport
4786 * 		uap->fd		The fd to reference
4787 * 		uap->portnamep  User address at which to place port name.
4788 *
4789 * Returns:	0		Success.
4790 *     		EBADF		Bad file descriptor.
4791 *     		EINVAL		File descriptor had type that cannot be sent, misc. other errors.
4792 *     		EFAULT		Address at which to store port name is not valid.
4793 *     		EAGAIN		Resource shortage.
4794 *
4795 * Implicit returns:
4796 *		On success, name of send right is stored at user-specified address.
4797 */
4798int
4799fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
4800    __unused int *retval)
4801{
4802	int err;
4803	int fd = uap->fd;
4804	user_addr_t user_portaddr = uap->portnamep;
4805	struct fileproc *fp = FILEPROC_NULL;
4806	struct fileglob *fg = NULL;
4807	ipc_port_t fileport;
4808	mach_port_name_t name = MACH_PORT_NULL;
4809
4810	err = fp_lookup(p, fd, &fp, 0);
4811	if (err != 0) {
4812		goto out;
4813	}
4814
4815	if (!filetype_issendable(fp->f_type)) {
4816		err = EINVAL;
4817		goto out;
4818	}
4819
4820	/* Dropped when port is deallocated */
4821	fg = fp->f_fglob;
4822	fg_ref(fp);
4823
4824	/* Allocate and initialize a port */
4825	fileport = fileport_alloc(fg);
4826	if (fileport == IPC_PORT_NULL) {
4827		err = EAGAIN;
4828		fg_drop(fp);
4829		goto out;
4830	}
4831
4832	/* Add an entry.  Deallocates port on failure. */
4833	name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
4834	if (!MACH_PORT_VALID(name)) {
4835		err = EINVAL;
4836		goto out;
4837	}
4838
4839	err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
4840	if (err != 0) {
4841		goto out;
4842	}
4843
4844	/* Tag the fileglob for debugging purposes */
4845	lck_mtx_lock_spin(&fg->fg_lock);
4846	fg->fg_lflags |= FG_PORTMADE;
4847	lck_mtx_unlock(&fg->fg_lock);
4848
4849	fp_drop(p, fd, fp, 0);
4850
4851	return 0;
4852
4853out:
4854	if (MACH_PORT_VALID(name)) {
4855		/* Don't care if another thread races us to deallocate the entry */
4856		(void) mach_port_deallocate(get_task_ipcspace(p->task), name);
4857	}
4858
4859	if (fp != FILEPROC_NULL) {
4860		fp_drop(p, fd, fp, 0);
4861	}
4862
4863	return err;
4864}
4865
4866void
4867fileport_releasefg(struct fileglob *fg)
4868{
4869	(void)closef_locked(NULL, fg, PROC_NULL);
4870
4871	return;
4872}
4873
4874
4875/*
4876 * fileport_makefd
4877 *
4878 * Description: Obtain the file descriptor for a given Mach send right.
4879 *
4880 * Parameters:	p		Process calling fileport
4881 * 		uap->port	Name of send right to file port.
4882 *
4883 * Returns:	0		Success
4884 *		EINVAL		Invalid Mach port name, or port is not for a file.
4885 *	fdalloc:EMFILE
4886 *	fdalloc:ENOMEM		Unable to allocate fileproc or extend file table.
4887 *
4888 * Implicit returns:
4889 *		*retval (modified)		The new descriptor
4890 */
4891int
4892fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
4893{
4894	struct fileglob *fg;
4895 	struct fileproc *fp = FILEPROC_NULL;
4896	ipc_port_t port = IPC_PORT_NULL;
4897	mach_port_name_t send = uap->port;
4898	kern_return_t res;
4899	int fd;
4900	int err;
4901
4902	res = ipc_object_copyin(get_task_ipcspace(p->task),
4903			send, MACH_MSG_TYPE_COPY_SEND, &port);
4904
4905	if (res != KERN_SUCCESS) {
4906		err = EINVAL;
4907		goto out;
4908	}
4909
4910	fg = fileport_port_to_fileglob(port);
4911	if (fg == NULL) {
4912		err = EINVAL;
4913		goto out;
4914	}
4915
4916	MALLOC_ZONE(fp, struct fileproc *, sizeof(*fp), M_FILEPROC, M_WAITOK);
4917	if (fp == FILEPROC_NULL) {
4918		err = ENOMEM;
4919		goto out;
4920	}
4921
4922	bzero(fp, sizeof(*fp));
4923
4924	fp->f_fglob = fg;
4925	fg_ref(fp);
4926
4927 	proc_fdlock(p);
4928	err = fdalloc(p, 0, &fd);
4929	if (err != 0) {
4930		proc_fdunlock(p);
4931		goto out;
4932	}
4933	*fdflags(p, fd) |= UF_EXCLOSE;
4934
4935	procfdtbl_releasefd(p, fd, fp);
4936	proc_fdunlock(p);
4937
4938	*retval = fd;
4939	err = 0;
4940out:
4941	if ((fp != NULL) && (0 != err)) {
4942		FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4943	}
4944
4945	if (IPC_PORT_NULL != port) {
4946		ipc_port_release_send(port);
4947	}
4948
4949	return err;
4950}
4951
4952
4953/*
4954 * dupfdopen
4955 *
4956 * Description:	Duplicate the specified descriptor to a free descriptor;
4957 *		this is the second half of fdopen(), above.
4958 *
4959 * Parameters:	fdp				filedesc pointer to fill in
4960 *		indx				fd to dup to
4961 *		dfd				fd to dup from
4962 *		mode				mode to set on new fd
4963 *		error				command code
4964 *
4965 * Returns:	0				Success
4966 *		EBADF				Source fd is bad
4967 *		EACCES				Requested mode not allowed
4968 *		!0				'error', if not ENODEV or
4969 *						ENXIO
4970 *
4971 * Notes:	XXX This is not thread safe; see fdopen() above
4972 */
4973int
4974dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
4975{
4976	struct fileproc *wfp;
4977	struct fileproc *fp;
4978#if CONFIG_MACF
4979	int myerror;
4980#endif
4981	proc_t p = current_proc();
4982
4983	/*
4984	 * If the to-be-dup'd fd number is greater than the allowed number
4985	 * of file descriptors, or the fd to be dup'd has already been
4986	 * closed, reject.  Note, check for new == old is necessary as
4987	 * falloc could allocate an already closed to-be-dup'd descriptor
4988	 * as the new descriptor.
4989	 */
4990	proc_fdlock(p);
4991
4992	fp = fdp->fd_ofiles[indx];
4993	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
4994			(wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
4995	                (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
4996
4997	        proc_fdunlock(p);
4998		return (EBADF);
4999	}
5000#if CONFIG_MACF
5001	myerror = mac_file_check_dup(proc_ucred(p), wfp->f_fglob, dfd);
5002	if (myerror) {
5003		proc_fdunlock(p);
5004		return (myerror);
5005	}
5006#endif
5007	/*
5008	 * There are two cases of interest here.
5009	 *
5010	 * For ENODEV simply dup (dfd) to file descriptor
5011	 * (indx) and return.
5012	 *
5013	 * For ENXIO steal away the file structure from (dfd) and
5014	 * store it in (indx).  (dfd) is effectively closed by
5015	 * this operation.
5016	 *
5017	 * Any other error code is just returned.
5018	 */
5019	switch (error) {
5020	case ENODEV:
5021		/*
5022		 * Check that the mode the file is being opened for is a
5023		 * subset of the mode of the existing descriptor.
5024		 */
5025	        if (((flags & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5026		        proc_fdunlock(p);
5027			return (EACCES);
5028		}
5029		if (indx > fdp->fd_lastfile)
5030			fdp->fd_lastfile = indx;
5031		(void)fg_ref(wfp);
5032
5033		if (fp->f_fglob)
5034		        fg_free(fp->f_fglob);
5035		fp->f_fglob = wfp->f_fglob;
5036
5037		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5038			(flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5039
5040	        proc_fdunlock(p);
5041		return (0);
5042
5043	default:
5044	        proc_fdunlock(p);
5045		return (error);
5046	}
5047	/* NOTREACHED */
5048}
5049
5050
5051/*
5052 * fg_ref
5053 *
5054 * Description:	Add a reference to a fileglob by fileproc
5055 *
5056 * Parameters:	fp				fileproc containing fileglob
5057 *						pointer
5058 *
5059 * Returns:	void
5060 *
5061 * Notes:	XXX Should use OSAddAtomic?
5062 */
5063void
5064fg_ref(struct fileproc * fp)
5065{
5066	struct fileglob *fg;
5067
5068	fg = fp->f_fglob;
5069
5070	lck_mtx_lock_spin(&fg->fg_lock);
5071
5072#if DIAGNOSTIC
5073	if ((fp->f_flags & ~((unsigned int)FP_VALID_FLAGS)) != 0)
5074		panic("fg_ref: invalid bits on fp %p", fp);
5075
5076	if (fg->fg_count == 0)
5077		panic("fg_ref: adding fgcount to zeroed fg: fp %p fg %p",
5078		    fp, fg);
5079#endif
5080	fg->fg_count++;
5081	lck_mtx_unlock(&fg->fg_lock);
5082}
5083
5084
5085/*
5086 * fg_drop
5087 *
5088 * Description:	Remove a reference to a fileglob by fileproc
5089 *
5090 * Parameters:	fp				fileproc containing fileglob
5091 *						pointer
5092 *
5093 * Returns:	void
5094 *
5095 * Notes:	XXX Should use OSAddAtomic?
5096 */
5097void
5098fg_drop(struct fileproc * fp)
5099{
5100	struct fileglob *fg;
5101
5102	fg = fp->f_fglob;
5103	lck_mtx_lock_spin(&fg->fg_lock);
5104	fg->fg_count--;
5105	lck_mtx_unlock(&fg->fg_lock);
5106}
5107
5108
5109/*
5110 * fg_insertuipc
5111 *
5112 * Description:	Insert fileglob onto message queue
5113 *
5114 * Parameters:	fg				Fileglob pointer to insert
5115 *
5116 * Returns:	void
5117 *
5118 * Locks:	Takes and drops fg_lock, potentially many times
5119 */
5120void
5121fg_insertuipc(struct fileglob * fg)
5122{
5123	int insertque = 0;
5124
5125	lck_mtx_lock_spin(&fg->fg_lock);
5126
5127	while (fg->fg_lflags & FG_RMMSGQ) {
5128		lck_mtx_convert_spin(&fg->fg_lock);
5129
5130		fg->fg_lflags |= FG_WRMMSGQ;
5131		msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
5132	}
5133
5134	fg->fg_count++;
5135	fg->fg_msgcount++;
5136	if (fg->fg_msgcount == 1) {
5137		fg->fg_lflags |= FG_INSMSGQ;
5138		insertque=1;
5139	}
5140	lck_mtx_unlock(&fg->fg_lock);
5141
5142	if (insertque) {
5143		lck_mtx_lock_spin(uipc_lock);
5144		unp_gc_wait();
5145		LIST_INSERT_HEAD(&fmsghead, fg, f_msglist);
5146		lck_mtx_unlock(uipc_lock);
5147		lck_mtx_lock(&fg->fg_lock);
5148		fg->fg_lflags &= ~FG_INSMSGQ;
5149		if (fg->fg_lflags & FG_WINSMSGQ) {
5150			fg->fg_lflags &= ~FG_WINSMSGQ;
5151			wakeup(&fg->fg_lflags);
5152		}
5153		lck_mtx_unlock(&fg->fg_lock);
5154	}
5155
5156}
5157
5158
5159/*
5160 * fg_removeuipc
5161 *
5162 * Description:	Remove fileglob from message queue
5163 *
5164 * Parameters:	fg				Fileglob pointer to remove
5165 *
5166 * Returns:	void
5167 *
5168 * Locks:	Takes and drops fg_lock, potentially many times
5169 */
5170void
5171fg_removeuipc(struct fileglob * fg)
5172{
5173	int removeque = 0;
5174
5175	lck_mtx_lock_spin(&fg->fg_lock);
5176	while (fg->fg_lflags & FG_INSMSGQ) {
5177		lck_mtx_convert_spin(&fg->fg_lock);
5178
5179		fg->fg_lflags |= FG_WINSMSGQ;
5180		msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
5181	}
5182	fg->fg_msgcount--;
5183	if (fg->fg_msgcount == 0) {
5184		fg->fg_lflags |= FG_RMMSGQ;
5185		removeque=1;
5186	}
5187	lck_mtx_unlock(&fg->fg_lock);
5188
5189	if (removeque) {
5190		lck_mtx_lock_spin(uipc_lock);
5191		unp_gc_wait();
5192		LIST_REMOVE(fg, f_msglist);
5193		lck_mtx_unlock(uipc_lock);
5194		lck_mtx_lock(&fg->fg_lock);
5195		fg->fg_lflags &= ~FG_RMMSGQ;
5196		if (fg->fg_lflags & FG_WRMMSGQ) {
5197			fg->fg_lflags &= ~FG_WRMMSGQ;
5198			wakeup(&fg->fg_lflags);
5199		}
5200		lck_mtx_unlock(&fg->fg_lock);
5201	}
5202}
5203
5204
5205/*
5206 * fo_read
5207 *
5208 * Description:	Generic fileops read indirected through the fileops pointer
5209 *		in the fileproc structure
5210 *
5211 * Parameters:	fp				fileproc structure pointer
5212 *		uio				user I/O structure pointer
5213 *		flags				FOF_ flags
5214 *		ctx				VFS context for operation
5215 *
5216 * Returns:	0				Success
5217 *		!0				Errno from read
5218 */
5219int
5220fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5221{
5222	return ((*fp->f_ops->fo_read)(fp, uio, flags, ctx));
5223}
5224
5225
5226/*
5227 * fo_write
5228 *
5229 * Description:	Generic fileops write indirected through the fileops pointer
5230 *		in the fileproc structure
5231 *
5232 * Parameters:	fp				fileproc structure pointer
5233 *		uio				user I/O structure pointer
5234 *		flags				FOF_ flags
5235 *		ctx				VFS context for operation
5236 *
5237 * Returns:	0				Success
5238 *		!0				Errno from write
5239 */
5240int
5241fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5242{
5243	return((*fp->f_ops->fo_write)(fp, uio, flags, ctx));
5244}
5245
5246
5247/*
5248 * fo_ioctl
5249 *
5250 * Description:	Generic fileops ioctl indirected through the fileops pointer
5251 *		in the fileproc structure
5252 *
5253 * Parameters:	fp				fileproc structure pointer
5254 *		com				ioctl command
5255 *		data				pointer to internalized copy
5256 *						of user space ioctl command
5257 *						parameter data in kernel space
5258 *		ctx				VFS context for operation
5259 *
5260 * Returns:	0				Success
5261 *		!0				Errno from ioctl
5262 *
5263 * Locks:	The caller is assumed to have held the proc_fdlock; this
5264 *		function releases and reacquires this lock.  If the caller
5265 *		accesses data protected by this lock prior to calling this
5266 *		function, it will need to revalidate/reacquire any cached
5267 *		protected data obtained prior to the call.
5268 */
5269int
5270fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5271{
5272	int error;
5273
5274	proc_fdunlock(vfs_context_proc(ctx));
5275	error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5276	proc_fdlock(vfs_context_proc(ctx));
5277	return(error);
5278}
5279
5280
5281/*
5282 * fo_select
5283 *
5284 * Description:	Generic fileops select indirected through the fileops pointer
5285 *		in the fileproc structure
5286 *
5287 * Parameters:	fp				fileproc structure pointer
5288 *		which				select which
5289 *		wql				pointer to wait queue list
5290 *		ctx				VFS context for operation
5291 *
5292 * Returns:	0				Success
5293 *		!0				Errno from select
5294 */
5295int
5296fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5297{
5298	return((*fp->f_ops->fo_select)(fp, which, wql, ctx));
5299}
5300
5301
5302/*
5303 * fo_close
5304 *
5305 * Description:	Generic fileops close indirected through the fileops pointer
5306 *		in the fileproc structure
5307 *
5308 * Parameters:	fp				fileproc structure pointer for
5309 *						file to close
5310 *		ctx				VFS context for operation
5311 *
5312 * Returns:	0				Success
5313 *		!0				Errno from close
5314 */
5315int
5316fo_close(struct fileglob *fg, vfs_context_t ctx)
5317{
5318	return((*fg->fg_ops->fo_close)(fg, ctx));
5319}
5320
5321
5322/*
5323 * fo_kqfilter
5324 *
5325 * Description:	Generic fileops kqueue filter indirected through the fileops
5326 *		pointer in the fileproc structure
5327 *
5328 * Parameters:	fp				fileproc structure pointer
5329 *		kn				pointer to knote to filter on
5330 *		ctx				VFS context for operation
5331 *
5332 * Returns:	0				Success
5333 *		!0				Errno from kqueue filter
5334 */
5335int
5336fo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
5337{
5338        return ((*fp->f_ops->fo_kqfilter)(fp, kn, ctx));
5339}
5340
5341/*
5342 * The ability to send a file descriptor to another
5343 * process is opt-in by file type.
5344 */
5345boolean_t
5346filetype_issendable(file_type_t fdtype)
5347{
5348	switch (fdtype) {
5349		case DTYPE_VNODE:
5350		case DTYPE_SOCKET:
5351		case DTYPE_PIPE:
5352		case DTYPE_PSXSHM:
5353			return TRUE;
5354		default:
5355			/* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */
5356			return FALSE;
5357	}
5358}
5359