kern_descrip.c revision 284665
1100616Smp/*- 259243Sobrien * Copyright (c) 1982, 1986, 1989, 1991, 1993 359243Sobrien * The Regents of the University of California. All rights reserved. 459243Sobrien * (c) UNIX System Laboratories, Inc. 559243Sobrien * All or some portions of this file are derived from material licensed 659243Sobrien * to the University of California by American Telephone and Telegraph 759243Sobrien * Co. or Unix System Laboratories, Inc. and are reproduced herein with 859243Sobrien * the permission of UNIX System Laboratories, Inc. 959243Sobrien * 1059243Sobrien * Redistribution and use in source and binary forms, with or without 1159243Sobrien * modification, are permitted provided that the following conditions 1259243Sobrien * are met: 1359243Sobrien * 1. Redistributions of source code must retain the above copyright 1459243Sobrien * notice, this list of conditions and the following disclaimer. 1559243Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1659243Sobrien * notice, this list of conditions and the following disclaimer in the 17100616Smp * documentation and/or other materials provided with the distribution. 1859243Sobrien * 4. Neither the name of the University nor the names of its contributors 1959243Sobrien * may be used to endorse or promote products derived from this software 2059243Sobrien * without specific prior written permission. 2159243Sobrien * 2259243Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2359243Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2459243Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2559243Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2659243Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2759243Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2859243Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2959243Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3059243Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3159243Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3259243Sobrien * SUCH DAMAGE. 3359243Sobrien * 3459243Sobrien * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35100616Smp */ 3659243Sobrien 3759243Sobrien#include <sys/cdefs.h> 3859243Sobrien__FBSDID("$FreeBSD: stable/10/sys/kern/kern_descrip.c 284665 2015-06-21 06:28:26Z trasz $"); 3959243Sobrien 4059243Sobrien#include "opt_capsicum.h" 4159243Sobrien#include "opt_compat.h" 4259243Sobrien#include "opt_ddb.h" 4359243Sobrien#include "opt_ktrace.h" 4459243Sobrien#include "opt_procdesc.h" 4559243Sobrien 4659243Sobrien#include <sys/param.h> 4759243Sobrien#include <sys/systm.h> 4859243Sobrien 4959243Sobrien#include <sys/capsicum.h> 5059243Sobrien#include <sys/conf.h> 5159243Sobrien#include <sys/domain.h> 5259243Sobrien#include <sys/fcntl.h> 5359243Sobrien#include <sys/file.h> 5459243Sobrien#include <sys/filedesc.h> 5559243Sobrien#include <sys/filio.h> 5659243Sobrien#include <sys/jail.h> 5759243Sobrien#include <sys/kernel.h> 5859243Sobrien#include <sys/ksem.h> 5959243Sobrien#include <sys/limits.h> 6059243Sobrien#include <sys/lock.h> 6159243Sobrien#include <sys/malloc.h> 6259243Sobrien#include <sys/mman.h> 6359243Sobrien#include <sys/mount.h> 6459243Sobrien#include <sys/mqueue.h> 6559243Sobrien#include <sys/mutex.h> 6659243Sobrien#include <sys/namei.h> 6759243Sobrien#include <sys/selinfo.h> 6859243Sobrien#include <sys/pipe.h> 6959243Sobrien#include <sys/priv.h> 7059243Sobrien#include <sys/proc.h> 7159243Sobrien#include <sys/procdesc.h> 7259243Sobrien#include <sys/protosw.h> 73100616Smp#include <sys/racct.h> 7459243Sobrien#include <sys/resourcevar.h> 7559243Sobrien#include <sys/sbuf.h> 7659243Sobrien#include <sys/signalvar.h> 7759243Sobrien#include <sys/socketvar.h> 7859243Sobrien#include <sys/stat.h> 7959243Sobrien#include <sys/sx.h> 8059243Sobrien#include <sys/syscallsubr.h> 8159243Sobrien#include <sys/sysctl.h> 8259243Sobrien#include <sys/sysproto.h> 8359243Sobrien#include <sys/tty.h> 8459243Sobrien#include <sys/unistd.h> 8559243Sobrien#include <sys/un.h> 8659243Sobrien#include <sys/unpcb.h> 8759243Sobrien#include <sys/user.h> 8859243Sobrien#include <sys/vnode.h> 8959243Sobrien#ifdef KTRACE 9059243Sobrien#include <sys/ktrace.h> 9159243Sobrien#endif 9259243Sobrien 9359243Sobrien#include <net/vnet.h> 9459243Sobrien 9559243Sobrien#include <netinet/in.h> 9659243Sobrien#include <netinet/in_pcb.h> 9759243Sobrien 9859243Sobrien#include <security/audit/audit.h> 9959243Sobrien 10059243Sobrien#include <vm/uma.h> 10159243Sobrien#include <vm/vm.h> 10259243Sobrien 10359243Sobrien#include <ddb/ddb.h> 10459243Sobrien 10559243Sobrienstatic MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 10659243Sobrienstatic MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 10759243Sobrien "file desc to leader structures"); 10859243Sobrienstatic MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 10959243SobrienMALLOC_DEFINE(M_FILECAPS, "filecaps", "descriptor capabilities"); 11059243Sobrien 11159243SobrienMALLOC_DECLARE(M_FADVISE); 11259243Sobrien 11359243Sobrienstatic uma_zone_t file_zone; 11459243Sobrien 11559243Sobrienvoid (*ksem_info)(struct ksem *ks, char *path, size_t size, uint32_t *value); 11659243Sobrien 11759243Sobrienstatic int closefp(struct filedesc *fdp, int fd, struct file *fp, 118100616Smp struct thread *td, int holdleaders); 119100616Smpstatic int fd_first_free(struct filedesc *fdp, int low, int size); 12059243Sobrienstatic int fd_last_used(struct filedesc *fdp, int size); 12159243Sobrienstatic void fdgrowtable(struct filedesc *fdp, int nfd); 12259243Sobrienstatic void fdgrowtable_exp(struct filedesc *fdp, int nfd); 12359243Sobrienstatic void fdunused(struct filedesc *fdp, int fd); 12459243Sobrienstatic void fdused(struct filedesc *fdp, int fd); 12559243Sobrienstatic int fill_pipe_info(struct pipe *pi, struct kinfo_file *kif); 12659243Sobrienstatic int fill_procdesc_info(struct procdesc *pdp, 12759243Sobrien struct kinfo_file *kif); 12859243Sobrienstatic int fill_pts_info(struct tty *tp, struct kinfo_file *kif); 12959243Sobrienstatic int fill_sem_info(struct file *fp, struct kinfo_file *kif); 13059243Sobrienstatic int fill_shm_info(struct file *fp, struct kinfo_file *kif); 13159243Sobrienstatic int fill_socket_info(struct socket *so, struct kinfo_file *kif); 13259243Sobrienstatic int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif); 13359243Sobrienstatic int getmaxfd(struct proc *p); 13459243Sobrien 13559243Sobrien/* 13659243Sobrien * Each process has: 13759243Sobrien * 13859243Sobrien * - An array of open file descriptors (fd_ofiles) 13959243Sobrien * - An array of file flags (fd_ofileflags) 14059243Sobrien * - A bitmap recording which descriptors are in use (fd_map) 14159243Sobrien * 14259243Sobrien * A process starts out with NDFILE descriptors. The value of NDFILE has 14359243Sobrien * been selected based the historical limit of 20 open files, and an 14459243Sobrien * assumption that the majority of processes, especially short-lived 14559243Sobrien * processes like shells, will never need more. 14659243Sobrien * 14759243Sobrien * If this initial allocation is exhausted, a larger descriptor table and 14859243Sobrien * map are allocated dynamically, and the pointers in the process's struct 14959243Sobrien * filedesc are updated to point to those. This is repeated every time 15059243Sobrien * the process runs out of file descriptors (provided it hasn't hit its 15159243Sobrien * resource limit). 15259243Sobrien * 15359243Sobrien * Since threads may hold references to individual descriptor table 15459243Sobrien * entries, the tables are never freed. Instead, they are placed on a 15559243Sobrien * linked list and freed only when the struct filedesc is released. 15659243Sobrien */ 15759243Sobrien#define NDFILE 20 15859243Sobrien#define NDSLOTSIZE sizeof(NDSLOTTYPE) 15959243Sobrien#define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 16059243Sobrien#define NDSLOT(x) ((x) / NDENTRIES) 16159243Sobrien#define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 16259243Sobrien#define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 16359243Sobrien 16459243Sobrien/* 16559243Sobrien * SLIST entry used to keep track of ofiles which must be reclaimed when 16659243Sobrien * the process exits. 16759243Sobrien */ 16859243Sobrienstruct freetable { 16959243Sobrien struct filedescent *ft_table; 17059243Sobrien SLIST_ENTRY(freetable) ft_next; 17159243Sobrien}; 17259243Sobrien 17359243Sobrien/* 17459243Sobrien * Initial allocation: a filedesc structure + the head of SLIST used to 17559243Sobrien * keep track of old ofiles + enough space for NDFILE descriptors. 17659243Sobrien */ 17759243Sobrienstruct filedesc0 { 17859243Sobrien struct filedesc fd_fd; 17959243Sobrien SLIST_HEAD(, freetable) fd_free; 18059243Sobrien struct filedescent fd_dfiles[NDFILE]; 18159243Sobrien NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 18259243Sobrien}; 18359243Sobrien 18459243Sobrien/* 18559243Sobrien * Descriptor management. 18659243Sobrien */ 18759243Sobrienvolatile int openfiles; /* actual number of open files */ 18859243Sobrienstruct mtx sigio_lock; /* mtx to protect pointers to sigio */ 18959243Sobrienvoid (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 19059243Sobrien 19159243Sobrien/* A mutex to protect the association between a proc and filedesc. */ 19259243Sobrienstatic struct mtx fdesc_mtx; 19359243Sobrien 19459243Sobrien/* 19559243Sobrien * If low >= size, just return low. Otherwise find the first zero bit in the 19659243Sobrien * given bitmap, starting at low and not exceeding size - 1. Return size if 19759243Sobrien * not found. 19859243Sobrien */ 19959243Sobrienstatic int 20059243Sobrienfd_first_free(struct filedesc *fdp, int low, int size) 20159243Sobrien{ 20259243Sobrien NDSLOTTYPE *map = fdp->fd_map; 20359243Sobrien NDSLOTTYPE mask; 20459243Sobrien int off, maxoff; 20559243Sobrien 20659243Sobrien if (low >= size) 20759243Sobrien return (low); 20859243Sobrien 20959243Sobrien off = NDSLOT(low); 21059243Sobrien if (low % NDENTRIES) { 21159243Sobrien mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 21259243Sobrien if ((mask &= ~map[off]) != 0UL) 21359243Sobrien return (off * NDENTRIES + ffsl(mask) - 1); 21459243Sobrien ++off; 21559243Sobrien } 21659243Sobrien for (maxoff = NDSLOTS(size); off < maxoff; ++off) 21759243Sobrien if (map[off] != ~0UL) 21859243Sobrien return (off * NDENTRIES + ffsl(~map[off]) - 1); 21959243Sobrien return (size); 22059243Sobrien} 22159243Sobrien 22259243Sobrien/* 22359243Sobrien * Find the highest non-zero bit in the given bitmap, starting at 0 and 22459243Sobrien * not exceeding size - 1. Return -1 if not found. 22559243Sobrien */ 22659243Sobrienstatic int 22759243Sobrienfd_last_used(struct filedesc *fdp, int size) 22859243Sobrien{ 22959243Sobrien NDSLOTTYPE *map = fdp->fd_map; 23059243Sobrien NDSLOTTYPE mask; 23159243Sobrien int off, minoff; 23259243Sobrien 23359243Sobrien off = NDSLOT(size); 23459243Sobrien if (size % NDENTRIES) { 23559243Sobrien mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 23659243Sobrien if ((mask &= map[off]) != 0) 23759243Sobrien return (off * NDENTRIES + flsl(mask) - 1); 23859243Sobrien --off; 23959243Sobrien } 24059243Sobrien for (minoff = NDSLOT(0); off >= minoff; --off) 24159243Sobrien if (map[off] != 0) 24259243Sobrien return (off * NDENTRIES + flsl(map[off]) - 1); 24359243Sobrien return (-1); 24459243Sobrien} 24559243Sobrien 24659243Sobrienstatic int 24759243Sobrienfdisused(struct filedesc *fdp, int fd) 24859243Sobrien{ 24959243Sobrien 25059243Sobrien FILEDESC_LOCK_ASSERT(fdp); 25159243Sobrien 25259243Sobrien KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 25359243Sobrien ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 25459243Sobrien 25559243Sobrien return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 25659243Sobrien} 25759243Sobrien 25859243Sobrien/* 25959243Sobrien * Mark a file descriptor as used. 26059243Sobrien */ 26159243Sobrienstatic void 26259243Sobrienfdused(struct filedesc *fdp, int fd) 26359243Sobrien{ 26459243Sobrien 26559243Sobrien FILEDESC_XLOCK_ASSERT(fdp); 26659243Sobrien 26759243Sobrien KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd)); 26859243Sobrien 26959243Sobrien fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 27059243Sobrien if (fd > fdp->fd_lastfile) 27159243Sobrien fdp->fd_lastfile = fd; 27259243Sobrien if (fd == fdp->fd_freefile) 27359243Sobrien fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 27459243Sobrien} 27559243Sobrien 27659243Sobrien/* 27759243Sobrien * Mark a file descriptor as unused. 27859243Sobrien */ 27959243Sobrienstatic void 28059243Sobrienfdunused(struct filedesc *fdp, int fd) 28159243Sobrien{ 28259243Sobrien 28359243Sobrien FILEDESC_XLOCK_ASSERT(fdp); 28459243Sobrien 28559243Sobrien KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); 28659243Sobrien KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, 28759243Sobrien ("fd=%d is still in use", fd)); 28859243Sobrien 28959243Sobrien fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 29059243Sobrien if (fd < fdp->fd_freefile) 29159243Sobrien fdp->fd_freefile = fd; 29259243Sobrien if (fd == fdp->fd_lastfile) 29359243Sobrien fdp->fd_lastfile = fd_last_used(fdp, fd); 29459243Sobrien} 29559243Sobrien 29659243Sobrien/* 29759243Sobrien * Free a file descriptor. 29859243Sobrien * 29959243Sobrien * Avoid some work if fdp is about to be destroyed. 30059243Sobrien */ 30159243Sobrienstatic inline void 30259243Sobrien_fdfree(struct filedesc *fdp, int fd, int last) 30359243Sobrien{ 30459243Sobrien struct filedescent *fde; 30559243Sobrien 30659243Sobrien fde = &fdp->fd_ofiles[fd]; 30759243Sobrien#ifdef CAPABILITIES 30859243Sobrien if (!last) 30959243Sobrien seq_write_begin(&fde->fde_seq); 31059243Sobrien#endif 31159243Sobrien filecaps_free(&fde->fde_caps); 31259243Sobrien if (last) 31359243Sobrien return; 31459243Sobrien bzero(fde, fde_change_size); 31559243Sobrien fdunused(fdp, fd); 31659243Sobrien#ifdef CAPABILITIES 31759243Sobrien seq_write_end(&fde->fde_seq); 31859243Sobrien#endif 31959243Sobrien} 32059243Sobrien 32159243Sobrienstatic inline void 32259243Sobrienfdfree(struct filedesc *fdp, int fd) 32359243Sobrien{ 32459243Sobrien 32559243Sobrien _fdfree(fdp, fd, 0); 32659243Sobrien} 32759243Sobrien 32859243Sobrienstatic inline void 32959243Sobrienfdfree_last(struct filedesc *fdp, int fd) 33059243Sobrien{ 33159243Sobrien 33259243Sobrien _fdfree(fdp, fd, 1); 33359243Sobrien} 33459243Sobrien 33559243Sobrien/* 33659243Sobrien * System calls on descriptors. 33759243Sobrien */ 33859243Sobrien#ifndef _SYS_SYSPROTO_H_ 33959243Sobrienstruct getdtablesize_args { 34059243Sobrien int dummy; 34159243Sobrien}; 34259243Sobrien#endif 34359243Sobrien/* ARGSUSED */ 34459243Sobrienint 34559243Sobriensys_getdtablesize(struct thread *td, struct getdtablesize_args *uap) 34659243Sobrien{ 34759243Sobrien struct proc *p = td->td_proc; 34859243Sobrien uint64_t lim; 34959243Sobrien 35059243Sobrien PROC_LOCK(p); 35159243Sobrien td->td_retval[0] = 35259243Sobrien min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 35359243Sobrien lim = racct_get_limit(td->td_proc, RACCT_NOFILE); 35459243Sobrien PROC_UNLOCK(p); 35559243Sobrien if (lim < td->td_retval[0]) 35659243Sobrien td->td_retval[0] = lim; 35759243Sobrien return (0); 35859243Sobrien} 35959243Sobrien 36059243Sobrien/* 36159243Sobrien * Duplicate a file descriptor to a particular value. 36259243Sobrien * 36359243Sobrien * Note: keep in mind that a potential race condition exists when closing 36459243Sobrien * descriptors from a shared descriptor table (via rfork). 36559243Sobrien */ 36659243Sobrien#ifndef _SYS_SYSPROTO_H_ 36759243Sobrienstruct dup2_args { 36859243Sobrien u_int from; 36959243Sobrien u_int to; 37059243Sobrien}; 37159243Sobrien#endif 37259243Sobrien/* ARGSUSED */ 37359243Sobrienint 37459243Sobriensys_dup2(struct thread *td, struct dup2_args *uap) 37559243Sobrien{ 37659243Sobrien 37759243Sobrien return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 37859243Sobrien td->td_retval)); 37959243Sobrien} 38059243Sobrien 38159243Sobrien/* 38259243Sobrien * Duplicate a file descriptor. 38359243Sobrien */ 38459243Sobrien#ifndef _SYS_SYSPROTO_H_ 38559243Sobrienstruct dup_args { 38659243Sobrien u_int fd; 38759243Sobrien}; 38859243Sobrien#endif 38959243Sobrien/* ARGSUSED */ 39059243Sobrienint 39159243Sobriensys_dup(struct thread *td, struct dup_args *uap) 39259243Sobrien{ 39359243Sobrien 39459243Sobrien return (do_dup(td, 0, (int)uap->fd, 0, td->td_retval)); 39559243Sobrien} 39659243Sobrien 39759243Sobrien/* 39859243Sobrien * The file control system call. 39959243Sobrien */ 40059243Sobrien#ifndef _SYS_SYSPROTO_H_ 40159243Sobrienstruct fcntl_args { 40259243Sobrien int fd; 40359243Sobrien int cmd; 40459243Sobrien long arg; 40559243Sobrien}; 40659243Sobrien#endif 40759243Sobrien/* ARGSUSED */ 40859243Sobrienint 40959243Sobriensys_fcntl(struct thread *td, struct fcntl_args *uap) 41059243Sobrien{ 41159243Sobrien 41259243Sobrien return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, uap->arg)); 41359243Sobrien} 41459243Sobrien 41559243Sobrienint 41659243Sobrienkern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg) 41759243Sobrien{ 41859243Sobrien struct flock fl; 41959243Sobrien struct __oflock ofl; 42059243Sobrien intptr_t arg1; 42159243Sobrien int error; 42259243Sobrien 42359243Sobrien error = 0; 42459243Sobrien switch (cmd) { 42559243Sobrien case F_OGETLK: 42659243Sobrien case F_OSETLK: 42759243Sobrien case F_OSETLKW: 42859243Sobrien /* 42959243Sobrien * Convert old flock structure to new. 43059243Sobrien */ 43159243Sobrien error = copyin((void *)(intptr_t)arg, &ofl, sizeof(ofl)); 43259243Sobrien fl.l_start = ofl.l_start; 43359243Sobrien fl.l_len = ofl.l_len; 43459243Sobrien fl.l_pid = ofl.l_pid; 43559243Sobrien fl.l_type = ofl.l_type; 43659243Sobrien fl.l_whence = ofl.l_whence; 43759243Sobrien fl.l_sysid = 0; 43859243Sobrien 43959243Sobrien switch (cmd) { 44059243Sobrien case F_OGETLK: 44159243Sobrien cmd = F_GETLK; 44259243Sobrien break; 44359243Sobrien case F_OSETLK: 44459243Sobrien cmd = F_SETLK; 44559243Sobrien break; 44659243Sobrien case F_OSETLKW: 44759243Sobrien cmd = F_SETLKW; 44859243Sobrien break; 44959243Sobrien } 45059243Sobrien arg1 = (intptr_t)&fl; 45159243Sobrien break; 45259243Sobrien case F_GETLK: 45359243Sobrien case F_SETLK: 45459243Sobrien case F_SETLKW: 45559243Sobrien case F_SETLK_REMOTE: 45659243Sobrien error = copyin((void *)(intptr_t)arg, &fl, sizeof(fl)); 45759243Sobrien arg1 = (intptr_t)&fl; 45859243Sobrien break; 45959243Sobrien default: 46059243Sobrien arg1 = arg; 46159243Sobrien break; 46259243Sobrien } 46359243Sobrien if (error) 46459243Sobrien return (error); 46559243Sobrien error = kern_fcntl(td, fd, cmd, arg1); 46659243Sobrien if (error) 46759243Sobrien return (error); 46859243Sobrien if (cmd == F_OGETLK) { 46959243Sobrien ofl.l_start = fl.l_start; 47059243Sobrien ofl.l_len = fl.l_len; 47159243Sobrien ofl.l_pid = fl.l_pid; 47259243Sobrien ofl.l_type = fl.l_type; 47359243Sobrien ofl.l_whence = fl.l_whence; 47459243Sobrien error = copyout(&ofl, (void *)(intptr_t)arg, sizeof(ofl)); 47559243Sobrien } else if (cmd == F_GETLK) { 47659243Sobrien error = copyout(&fl, (void *)(intptr_t)arg, sizeof(fl)); 47759243Sobrien } 47859243Sobrien return (error); 47959243Sobrien} 48059243Sobrien 48159243Sobrienint 48259243Sobrienkern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 48359243Sobrien{ 48459243Sobrien struct filedesc *fdp; 48559243Sobrien struct flock *flp; 48659243Sobrien struct file *fp, *fp2; 48759243Sobrien struct filedescent *fde; 48859243Sobrien struct proc *p; 48959243Sobrien struct vnode *vp; 49059243Sobrien cap_rights_t rights; 49159243Sobrien int error, flg, tmp; 49259243Sobrien uint64_t bsize; 49359243Sobrien off_t foffset; 49459243Sobrien 49559243Sobrien error = 0; 49659243Sobrien flg = F_POSIX; 49759243Sobrien p = td->td_proc; 49859243Sobrien fdp = p->p_fd; 49959243Sobrien 50059243Sobrien switch (cmd) { 50159243Sobrien case F_DUPFD: 50259243Sobrien tmp = arg; 50359243Sobrien error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval); 50459243Sobrien break; 50559243Sobrien 50659243Sobrien case F_DUPFD_CLOEXEC: 50759243Sobrien tmp = arg; 50859243Sobrien error = do_dup(td, DUP_FCNTL | DUP_CLOEXEC, fd, tmp, 50959243Sobrien td->td_retval); 51059243Sobrien break; 51159243Sobrien 51259243Sobrien case F_DUP2FD: 51359243Sobrien tmp = arg; 51459243Sobrien error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval); 51559243Sobrien break; 51659243Sobrien 51759243Sobrien case F_DUP2FD_CLOEXEC: 51859243Sobrien tmp = arg; 51959243Sobrien error = do_dup(td, DUP_FIXED | DUP_CLOEXEC, fd, tmp, 52059243Sobrien td->td_retval); 52159243Sobrien break; 52259243Sobrien 52359243Sobrien case F_GETFD: 52459243Sobrien FILEDESC_SLOCK(fdp); 52559243Sobrien if ((fp = fget_locked(fdp, fd)) == NULL) { 52659243Sobrien FILEDESC_SUNLOCK(fdp); 52759243Sobrien error = EBADF; 52859243Sobrien break; 52959243Sobrien } 53059243Sobrien fde = &fdp->fd_ofiles[fd]; 53159243Sobrien td->td_retval[0] = 53259243Sobrien (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; 53359243Sobrien FILEDESC_SUNLOCK(fdp); 53459243Sobrien break; 53559243Sobrien 53659243Sobrien case F_SETFD: 53759243Sobrien FILEDESC_XLOCK(fdp); 53859243Sobrien if ((fp = fget_locked(fdp, fd)) == NULL) { 53959243Sobrien FILEDESC_XUNLOCK(fdp); 54059243Sobrien error = EBADF; 54159243Sobrien break; 54259243Sobrien } 54359243Sobrien fde = &fdp->fd_ofiles[fd]; 54459243Sobrien fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | 54559243Sobrien (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 54659243Sobrien FILEDESC_XUNLOCK(fdp); 54759243Sobrien break; 54859243Sobrien 54959243Sobrien case F_GETFL: 55059243Sobrien error = fget_unlocked(fdp, fd, 55159243Sobrien cap_rights_init(&rights, CAP_FCNTL), F_GETFL, &fp, NULL); 55259243Sobrien if (error != 0) 55359243Sobrien break; 55459243Sobrien td->td_retval[0] = OFLAGS(fp->f_flag); 55559243Sobrien fdrop(fp, td); 55659243Sobrien break; 55759243Sobrien 55859243Sobrien case F_SETFL: 55959243Sobrien error = fget_unlocked(fdp, fd, 56059243Sobrien cap_rights_init(&rights, CAP_FCNTL), F_SETFL, &fp, NULL); 56159243Sobrien if (error != 0) 56259243Sobrien break; 56359243Sobrien do { 56459243Sobrien tmp = flg = fp->f_flag; 56559243Sobrien tmp &= ~FCNTLFLAGS; 56659243Sobrien tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 56759243Sobrien } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); 56859243Sobrien tmp = fp->f_flag & FNONBLOCK; 56959243Sobrien error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 57059243Sobrien if (error != 0) { 57159243Sobrien fdrop(fp, td); 57259243Sobrien break; 57359243Sobrien } 57459243Sobrien tmp = fp->f_flag & FASYNC; 57559243Sobrien error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 57659243Sobrien if (error == 0) { 57759243Sobrien fdrop(fp, td); 57859243Sobrien break; 57959243Sobrien } 58059243Sobrien atomic_clear_int(&fp->f_flag, FNONBLOCK); 58159243Sobrien tmp = 0; 58259243Sobrien (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 58359243Sobrien fdrop(fp, td); 58459243Sobrien break; 58559243Sobrien 58659243Sobrien case F_GETOWN: 58759243Sobrien error = fget_unlocked(fdp, fd, 58859243Sobrien cap_rights_init(&rights, CAP_FCNTL), F_GETOWN, &fp, NULL); 58959243Sobrien if (error != 0) 59059243Sobrien break; 59159243Sobrien error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 59259243Sobrien if (error == 0) 59359243Sobrien td->td_retval[0] = tmp; 59459243Sobrien fdrop(fp, td); 59559243Sobrien break; 59659243Sobrien 59759243Sobrien case F_SETOWN: 59859243Sobrien error = fget_unlocked(fdp, fd, 59959243Sobrien cap_rights_init(&rights, CAP_FCNTL), F_SETOWN, &fp, NULL); 60059243Sobrien if (error != 0) 60159243Sobrien break; 60259243Sobrien tmp = arg; 60359243Sobrien error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 60459243Sobrien fdrop(fp, td); 60559243Sobrien break; 60659243Sobrien 60759243Sobrien case F_SETLK_REMOTE: 60859243Sobrien error = priv_check(td, PRIV_NFS_LOCKD); 60959243Sobrien if (error) 61059243Sobrien return (error); 61159243Sobrien flg = F_REMOTE; 61259243Sobrien goto do_setlk; 61359243Sobrien 61459243Sobrien case F_SETLKW: 61559243Sobrien flg |= F_WAIT; 61659243Sobrien /* FALLTHROUGH F_SETLK */ 61759243Sobrien 61859243Sobrien case F_SETLK: 61959243Sobrien do_setlk: 62059243Sobrien cap_rights_init(&rights, CAP_FLOCK); 62159243Sobrien error = fget_unlocked(fdp, fd, &rights, 0, &fp, NULL); 62259243Sobrien if (error != 0) 62359243Sobrien break; 624 if (fp->f_type != DTYPE_VNODE) { 625 error = EBADF; 626 fdrop(fp, td); 627 break; 628 } 629 630 flp = (struct flock *)arg; 631 if (flp->l_whence == SEEK_CUR) { 632 foffset = foffset_get(fp); 633 if (foffset < 0 || 634 (flp->l_start > 0 && 635 foffset > OFF_MAX - flp->l_start)) { 636 FILEDESC_SUNLOCK(fdp); 637 error = EOVERFLOW; 638 fdrop(fp, td); 639 break; 640 } 641 flp->l_start += foffset; 642 } 643 644 vp = fp->f_vnode; 645 switch (flp->l_type) { 646 case F_RDLCK: 647 if ((fp->f_flag & FREAD) == 0) { 648 error = EBADF; 649 break; 650 } 651 PROC_LOCK(p->p_leader); 652 p->p_leader->p_flag |= P_ADVLOCK; 653 PROC_UNLOCK(p->p_leader); 654 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 655 flp, flg); 656 break; 657 case F_WRLCK: 658 if ((fp->f_flag & FWRITE) == 0) { 659 error = EBADF; 660 break; 661 } 662 PROC_LOCK(p->p_leader); 663 p->p_leader->p_flag |= P_ADVLOCK; 664 PROC_UNLOCK(p->p_leader); 665 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 666 flp, flg); 667 break; 668 case F_UNLCK: 669 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 670 flp, flg); 671 break; 672 case F_UNLCKSYS: 673 /* 674 * Temporary api for testing remote lock 675 * infrastructure. 676 */ 677 if (flg != F_REMOTE) { 678 error = EINVAL; 679 break; 680 } 681 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 682 F_UNLCKSYS, flp, flg); 683 break; 684 default: 685 error = EINVAL; 686 break; 687 } 688 if (error != 0 || flp->l_type == F_UNLCK || 689 flp->l_type == F_UNLCKSYS) { 690 fdrop(fp, td); 691 break; 692 } 693 694 /* 695 * Check for a race with close. 696 * 697 * The vnode is now advisory locked (or unlocked, but this case 698 * is not really important) as the caller requested. 699 * We had to drop the filedesc lock, so we need to recheck if 700 * the descriptor is still valid, because if it was closed 701 * in the meantime we need to remove advisory lock from the 702 * vnode - close on any descriptor leading to an advisory 703 * locked vnode, removes that lock. 704 * We will return 0 on purpose in that case, as the result of 705 * successful advisory lock might have been externally visible 706 * already. This is fine - effectively we pretend to the caller 707 * that the closing thread was a bit slower and that the 708 * advisory lock succeeded before the close. 709 */ 710 error = fget_unlocked(fdp, fd, &rights, 0, &fp2, NULL); 711 if (error != 0) { 712 fdrop(fp, td); 713 break; 714 } 715 if (fp != fp2) { 716 flp->l_whence = SEEK_SET; 717 flp->l_start = 0; 718 flp->l_len = 0; 719 flp->l_type = F_UNLCK; 720 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 721 F_UNLCK, flp, F_POSIX); 722 } 723 fdrop(fp, td); 724 fdrop(fp2, td); 725 break; 726 727 case F_GETLK: 728 error = fget_unlocked(fdp, fd, 729 cap_rights_init(&rights, CAP_FLOCK), 0, &fp, NULL); 730 if (error != 0) 731 break; 732 if (fp->f_type != DTYPE_VNODE) { 733 error = EBADF; 734 fdrop(fp, td); 735 break; 736 } 737 flp = (struct flock *)arg; 738 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 739 flp->l_type != F_UNLCK) { 740 error = EINVAL; 741 fdrop(fp, td); 742 break; 743 } 744 if (flp->l_whence == SEEK_CUR) { 745 foffset = foffset_get(fp); 746 if ((flp->l_start > 0 && 747 foffset > OFF_MAX - flp->l_start) || 748 (flp->l_start < 0 && 749 foffset < OFF_MIN - flp->l_start)) { 750 FILEDESC_SUNLOCK(fdp); 751 error = EOVERFLOW; 752 fdrop(fp, td); 753 break; 754 } 755 flp->l_start += foffset; 756 } 757 vp = fp->f_vnode; 758 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 759 F_POSIX); 760 fdrop(fp, td); 761 break; 762 763 case F_RDAHEAD: 764 arg = arg ? 128 * 1024: 0; 765 /* FALLTHROUGH */ 766 case F_READAHEAD: 767 error = fget_unlocked(fdp, fd, NULL, 0, &fp, NULL); 768 if (error != 0) 769 break; 770 if (fp->f_type != DTYPE_VNODE) { 771 fdrop(fp, td); 772 error = EBADF; 773 break; 774 } 775 vp = fp->f_vnode; 776 /* 777 * Exclusive lock synchronizes against f_seqcount reads and 778 * writes in sequential_heuristic(). 779 */ 780 error = vn_lock(vp, LK_EXCLUSIVE); 781 if (error != 0) { 782 fdrop(fp, td); 783 break; 784 } 785 if (arg >= 0) { 786 bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize; 787 fp->f_seqcount = (arg + bsize - 1) / bsize; 788 atomic_set_int(&fp->f_flag, FRDAHEAD); 789 } else { 790 atomic_clear_int(&fp->f_flag, FRDAHEAD); 791 } 792 VOP_UNLOCK(vp, 0); 793 fdrop(fp, td); 794 break; 795 796 default: 797 error = EINVAL; 798 break; 799 } 800 return (error); 801} 802 803static int 804getmaxfd(struct proc *p) 805{ 806 int maxfd; 807 808 PROC_LOCK(p); 809 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 810 PROC_UNLOCK(p); 811 812 return (maxfd); 813} 814 815/* 816 * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). 817 */ 818int 819do_dup(struct thread *td, int flags, int old, int new, 820 register_t *retval) 821{ 822 struct filedesc *fdp; 823 struct filedescent *oldfde, *newfde; 824 struct proc *p; 825 struct file *fp; 826 struct file *delfp; 827 int error, maxfd; 828 829 p = td->td_proc; 830 fdp = p->p_fd; 831 832 /* 833 * Verify we have a valid descriptor to dup from and possibly to 834 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should 835 * return EINVAL when the new descriptor is out of bounds. 836 */ 837 if (old < 0) 838 return (EBADF); 839 if (new < 0) 840 return (flags & DUP_FCNTL ? EINVAL : EBADF); 841 maxfd = getmaxfd(p); 842 if (new >= maxfd) 843 return (flags & DUP_FCNTL ? EINVAL : EBADF); 844 845 FILEDESC_XLOCK(fdp); 846 if (fget_locked(fdp, old) == NULL) { 847 FILEDESC_XUNLOCK(fdp); 848 return (EBADF); 849 } 850 oldfde = &fdp->fd_ofiles[old]; 851 if (flags & DUP_FIXED && old == new) { 852 *retval = new; 853 if (flags & DUP_CLOEXEC) 854 fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; 855 FILEDESC_XUNLOCK(fdp); 856 return (0); 857 } 858 fp = oldfde->fde_file; 859 fhold(fp); 860 861 /* 862 * If the caller specified a file descriptor, make sure the file 863 * table is large enough to hold it, and grab it. Otherwise, just 864 * allocate a new descriptor the usual way. 865 */ 866 if (flags & DUP_FIXED) { 867 if (new >= fdp->fd_nfiles) { 868 /* 869 * The resource limits are here instead of e.g. 870 * fdalloc(), because the file descriptor table may be 871 * shared between processes, so we can't really use 872 * racct_add()/racct_sub(). Instead of counting the 873 * number of actually allocated descriptors, just put 874 * the limit on the size of the file descriptor table. 875 */ 876#ifdef RACCT 877 if (racct_enable) { 878 PROC_LOCK(p); 879 error = racct_set(p, RACCT_NOFILE, new + 1); 880 PROC_UNLOCK(p); 881 if (error != 0) { 882 FILEDESC_XUNLOCK(fdp); 883 fdrop(fp, td); 884 return (EMFILE); 885 } 886 } 887#endif 888 fdgrowtable_exp(fdp, new + 1); 889 oldfde = &fdp->fd_ofiles[old]; 890 } 891 newfde = &fdp->fd_ofiles[new]; 892 if (newfde->fde_file == NULL) 893 fdused(fdp, new); 894 } else { 895 if ((error = fdalloc(td, new, &new)) != 0) { 896 FILEDESC_XUNLOCK(fdp); 897 fdrop(fp, td); 898 return (error); 899 } 900 newfde = &fdp->fd_ofiles[new]; 901 } 902 903 KASSERT(fp == oldfde->fde_file, ("old fd has been modified")); 904 KASSERT(old != new, ("new fd is same as old")); 905 906 delfp = newfde->fde_file; 907 908 /* 909 * Duplicate the source descriptor. 910 */ 911#ifdef CAPABILITIES 912 seq_write_begin(&newfde->fde_seq); 913#endif 914 filecaps_free(&newfde->fde_caps); 915 memcpy(newfde, oldfde, fde_change_size); 916 filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); 917 if ((flags & DUP_CLOEXEC) != 0) 918 newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; 919 else 920 newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; 921#ifdef CAPABILITIES 922 seq_write_end(&newfde->fde_seq); 923#endif 924 *retval = new; 925 926 if (delfp != NULL) { 927 (void) closefp(fdp, new, delfp, td, 1); 928 /* closefp() drops the FILEDESC lock for us. */ 929 } else { 930 FILEDESC_XUNLOCK(fdp); 931 } 932 933 return (0); 934} 935 936/* 937 * If sigio is on the list associated with a process or process group, 938 * disable signalling from the device, remove sigio from the list and 939 * free sigio. 940 */ 941void 942funsetown(struct sigio **sigiop) 943{ 944 struct sigio *sigio; 945 946 SIGIO_LOCK(); 947 sigio = *sigiop; 948 if (sigio == NULL) { 949 SIGIO_UNLOCK(); 950 return; 951 } 952 *(sigio->sio_myref) = NULL; 953 if ((sigio)->sio_pgid < 0) { 954 struct pgrp *pg = (sigio)->sio_pgrp; 955 PGRP_LOCK(pg); 956 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 957 sigio, sio_pgsigio); 958 PGRP_UNLOCK(pg); 959 } else { 960 struct proc *p = (sigio)->sio_proc; 961 PROC_LOCK(p); 962 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 963 sigio, sio_pgsigio); 964 PROC_UNLOCK(p); 965 } 966 SIGIO_UNLOCK(); 967 crfree(sigio->sio_ucred); 968 free(sigio, M_SIGIO); 969} 970 971/* 972 * Free a list of sigio structures. 973 * We only need to lock the SIGIO_LOCK because we have made ourselves 974 * inaccessible to callers of fsetown and therefore do not need to lock 975 * the proc or pgrp struct for the list manipulation. 976 */ 977void 978funsetownlst(struct sigiolst *sigiolst) 979{ 980 struct proc *p; 981 struct pgrp *pg; 982 struct sigio *sigio; 983 984 sigio = SLIST_FIRST(sigiolst); 985 if (sigio == NULL) 986 return; 987 p = NULL; 988 pg = NULL; 989 990 /* 991 * Every entry of the list should belong 992 * to a single proc or pgrp. 993 */ 994 if (sigio->sio_pgid < 0) { 995 pg = sigio->sio_pgrp; 996 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 997 } else /* if (sigio->sio_pgid > 0) */ { 998 p = sigio->sio_proc; 999 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1000 } 1001 1002 SIGIO_LOCK(); 1003 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 1004 *(sigio->sio_myref) = NULL; 1005 if (pg != NULL) { 1006 KASSERT(sigio->sio_pgid < 0, 1007 ("Proc sigio in pgrp sigio list")); 1008 KASSERT(sigio->sio_pgrp == pg, 1009 ("Bogus pgrp in sigio list")); 1010 PGRP_LOCK(pg); 1011 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 1012 sio_pgsigio); 1013 PGRP_UNLOCK(pg); 1014 } else /* if (p != NULL) */ { 1015 KASSERT(sigio->sio_pgid > 0, 1016 ("Pgrp sigio in proc sigio list")); 1017 KASSERT(sigio->sio_proc == p, 1018 ("Bogus proc in sigio list")); 1019 PROC_LOCK(p); 1020 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 1021 sio_pgsigio); 1022 PROC_UNLOCK(p); 1023 } 1024 SIGIO_UNLOCK(); 1025 crfree(sigio->sio_ucred); 1026 free(sigio, M_SIGIO); 1027 SIGIO_LOCK(); 1028 } 1029 SIGIO_UNLOCK(); 1030} 1031 1032/* 1033 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 1034 * 1035 * After permission checking, add a sigio structure to the sigio list for 1036 * the process or process group. 1037 */ 1038int 1039fsetown(pid_t pgid, struct sigio **sigiop) 1040{ 1041 struct proc *proc; 1042 struct pgrp *pgrp; 1043 struct sigio *sigio; 1044 int ret; 1045 1046 if (pgid == 0) { 1047 funsetown(sigiop); 1048 return (0); 1049 } 1050 1051 ret = 0; 1052 1053 /* Allocate and fill in the new sigio out of locks. */ 1054 sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK); 1055 sigio->sio_pgid = pgid; 1056 sigio->sio_ucred = crhold(curthread->td_ucred); 1057 sigio->sio_myref = sigiop; 1058 1059 sx_slock(&proctree_lock); 1060 if (pgid > 0) { 1061 proc = pfind(pgid); 1062 if (proc == NULL) { 1063 ret = ESRCH; 1064 goto fail; 1065 } 1066 1067 /* 1068 * Policy - Don't allow a process to FSETOWN a process 1069 * in another session. 1070 * 1071 * Remove this test to allow maximum flexibility or 1072 * restrict FSETOWN to the current process or process 1073 * group for maximum safety. 1074 */ 1075 PROC_UNLOCK(proc); 1076 if (proc->p_session != curthread->td_proc->p_session) { 1077 ret = EPERM; 1078 goto fail; 1079 } 1080 1081 pgrp = NULL; 1082 } else /* if (pgid < 0) */ { 1083 pgrp = pgfind(-pgid); 1084 if (pgrp == NULL) { 1085 ret = ESRCH; 1086 goto fail; 1087 } 1088 PGRP_UNLOCK(pgrp); 1089 1090 /* 1091 * Policy - Don't allow a process to FSETOWN a process 1092 * in another session. 1093 * 1094 * Remove this test to allow maximum flexibility or 1095 * restrict FSETOWN to the current process or process 1096 * group for maximum safety. 1097 */ 1098 if (pgrp->pg_session != curthread->td_proc->p_session) { 1099 ret = EPERM; 1100 goto fail; 1101 } 1102 1103 proc = NULL; 1104 } 1105 funsetown(sigiop); 1106 if (pgid > 0) { 1107 PROC_LOCK(proc); 1108 /* 1109 * Since funsetownlst() is called without the proctree 1110 * locked, we need to check for P_WEXIT. 1111 * XXX: is ESRCH correct? 1112 */ 1113 if ((proc->p_flag & P_WEXIT) != 0) { 1114 PROC_UNLOCK(proc); 1115 ret = ESRCH; 1116 goto fail; 1117 } 1118 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 1119 sigio->sio_proc = proc; 1120 PROC_UNLOCK(proc); 1121 } else { 1122 PGRP_LOCK(pgrp); 1123 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 1124 sigio->sio_pgrp = pgrp; 1125 PGRP_UNLOCK(pgrp); 1126 } 1127 sx_sunlock(&proctree_lock); 1128 SIGIO_LOCK(); 1129 *sigiop = sigio; 1130 SIGIO_UNLOCK(); 1131 return (0); 1132 1133fail: 1134 sx_sunlock(&proctree_lock); 1135 crfree(sigio->sio_ucred); 1136 free(sigio, M_SIGIO); 1137 return (ret); 1138} 1139 1140/* 1141 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 1142 */ 1143pid_t 1144fgetown(sigiop) 1145 struct sigio **sigiop; 1146{ 1147 pid_t pgid; 1148 1149 SIGIO_LOCK(); 1150 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 1151 SIGIO_UNLOCK(); 1152 return (pgid); 1153} 1154 1155/* 1156 * Function drops the filedesc lock on return. 1157 */ 1158static int 1159closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, 1160 int holdleaders) 1161{ 1162 int error; 1163 1164 FILEDESC_XLOCK_ASSERT(fdp); 1165 1166 if (holdleaders) { 1167 if (td->td_proc->p_fdtol != NULL) { 1168 /* 1169 * Ask fdfree() to sleep to ensure that all relevant 1170 * process leaders can be traversed in closef(). 1171 */ 1172 fdp->fd_holdleaderscount++; 1173 } else { 1174 holdleaders = 0; 1175 } 1176 } 1177 1178 /* 1179 * We now hold the fp reference that used to be owned by the 1180 * descriptor array. We have to unlock the FILEDESC *AFTER* 1181 * knote_fdclose to prevent a race of the fd getting opened, a knote 1182 * added, and deleteing a knote for the new fd. 1183 */ 1184 knote_fdclose(td, fd); 1185 1186 /* 1187 * We need to notify mqueue if the object is of type mqueue. 1188 */ 1189 if (fp->f_type == DTYPE_MQUEUE) 1190 mq_fdclose(td, fd, fp); 1191 FILEDESC_XUNLOCK(fdp); 1192 1193 error = closef(fp, td); 1194 if (holdleaders) { 1195 FILEDESC_XLOCK(fdp); 1196 fdp->fd_holdleaderscount--; 1197 if (fdp->fd_holdleaderscount == 0 && 1198 fdp->fd_holdleaderswakeup != 0) { 1199 fdp->fd_holdleaderswakeup = 0; 1200 wakeup(&fdp->fd_holdleaderscount); 1201 } 1202 FILEDESC_XUNLOCK(fdp); 1203 } 1204 return (error); 1205} 1206 1207/* 1208 * Close a file descriptor. 1209 */ 1210#ifndef _SYS_SYSPROTO_H_ 1211struct close_args { 1212 int fd; 1213}; 1214#endif 1215/* ARGSUSED */ 1216int 1217sys_close(td, uap) 1218 struct thread *td; 1219 struct close_args *uap; 1220{ 1221 1222 return (kern_close(td, uap->fd)); 1223} 1224 1225int 1226kern_close(td, fd) 1227 struct thread *td; 1228 int fd; 1229{ 1230 struct filedesc *fdp; 1231 struct file *fp; 1232 1233 fdp = td->td_proc->p_fd; 1234 1235 AUDIT_SYSCLOSE(td, fd); 1236 1237 FILEDESC_XLOCK(fdp); 1238 if ((fp = fget_locked(fdp, fd)) == NULL) { 1239 FILEDESC_XUNLOCK(fdp); 1240 return (EBADF); 1241 } 1242 fdfree(fdp, fd); 1243 1244 /* closefp() drops the FILEDESC lock for us. */ 1245 return (closefp(fdp, fd, fp, td, 1)); 1246} 1247 1248/* 1249 * Close open file descriptors. 1250 */ 1251#ifndef _SYS_SYSPROTO_H_ 1252struct closefrom_args { 1253 int lowfd; 1254}; 1255#endif 1256/* ARGSUSED */ 1257int 1258sys_closefrom(struct thread *td, struct closefrom_args *uap) 1259{ 1260 struct filedesc *fdp; 1261 int fd; 1262 1263 fdp = td->td_proc->p_fd; 1264 AUDIT_ARG_FD(uap->lowfd); 1265 1266 /* 1267 * Treat negative starting file descriptor values identical to 1268 * closefrom(0) which closes all files. 1269 */ 1270 if (uap->lowfd < 0) 1271 uap->lowfd = 0; 1272 FILEDESC_SLOCK(fdp); 1273 for (fd = uap->lowfd; fd <= fdp->fd_lastfile; fd++) { 1274 if (fdp->fd_ofiles[fd].fde_file != NULL) { 1275 FILEDESC_SUNLOCK(fdp); 1276 (void)kern_close(td, fd); 1277 FILEDESC_SLOCK(fdp); 1278 } 1279 } 1280 FILEDESC_SUNLOCK(fdp); 1281 return (0); 1282} 1283 1284#if defined(COMPAT_43) 1285/* 1286 * Return status information about a file descriptor. 1287 */ 1288#ifndef _SYS_SYSPROTO_H_ 1289struct ofstat_args { 1290 int fd; 1291 struct ostat *sb; 1292}; 1293#endif 1294/* ARGSUSED */ 1295int 1296ofstat(struct thread *td, struct ofstat_args *uap) 1297{ 1298 struct ostat oub; 1299 struct stat ub; 1300 int error; 1301 1302 error = kern_fstat(td, uap->fd, &ub); 1303 if (error == 0) { 1304 cvtstat(&ub, &oub); 1305 error = copyout(&oub, uap->sb, sizeof(oub)); 1306 } 1307 return (error); 1308} 1309#endif /* COMPAT_43 */ 1310 1311/* 1312 * Return status information about a file descriptor. 1313 */ 1314#ifndef _SYS_SYSPROTO_H_ 1315struct fstat_args { 1316 int fd; 1317 struct stat *sb; 1318}; 1319#endif 1320/* ARGSUSED */ 1321int 1322sys_fstat(struct thread *td, struct fstat_args *uap) 1323{ 1324 struct stat ub; 1325 int error; 1326 1327 error = kern_fstat(td, uap->fd, &ub); 1328 if (error == 0) 1329 error = copyout(&ub, uap->sb, sizeof(ub)); 1330 return (error); 1331} 1332 1333int 1334kern_fstat(struct thread *td, int fd, struct stat *sbp) 1335{ 1336 struct file *fp; 1337 cap_rights_t rights; 1338 int error; 1339 1340 AUDIT_ARG_FD(fd); 1341 1342 error = fget(td, fd, cap_rights_init(&rights, CAP_FSTAT), &fp); 1343 if (error != 0) 1344 return (error); 1345 1346 AUDIT_ARG_FILE(td->td_proc, fp); 1347 1348 error = fo_stat(fp, sbp, td->td_ucred, td); 1349 fdrop(fp, td); 1350#ifdef KTRACE 1351 if (error == 0 && KTRPOINT(td, KTR_STRUCT)) 1352 ktrstat(sbp); 1353#endif 1354 return (error); 1355} 1356 1357/* 1358 * Return status information about a file descriptor. 1359 */ 1360#ifndef _SYS_SYSPROTO_H_ 1361struct nfstat_args { 1362 int fd; 1363 struct nstat *sb; 1364}; 1365#endif 1366/* ARGSUSED */ 1367int 1368sys_nfstat(struct thread *td, struct nfstat_args *uap) 1369{ 1370 struct nstat nub; 1371 struct stat ub; 1372 int error; 1373 1374 error = kern_fstat(td, uap->fd, &ub); 1375 if (error == 0) { 1376 cvtnstat(&ub, &nub); 1377 error = copyout(&nub, uap->sb, sizeof(nub)); 1378 } 1379 return (error); 1380} 1381 1382/* 1383 * Return pathconf information about a file descriptor. 1384 */ 1385#ifndef _SYS_SYSPROTO_H_ 1386struct fpathconf_args { 1387 int fd; 1388 int name; 1389}; 1390#endif 1391/* ARGSUSED */ 1392int 1393sys_fpathconf(struct thread *td, struct fpathconf_args *uap) 1394{ 1395 struct file *fp; 1396 struct vnode *vp; 1397 cap_rights_t rights; 1398 int error; 1399 1400 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FPATHCONF), &fp); 1401 if (error != 0) 1402 return (error); 1403 1404 /* If asynchronous I/O is available, it works for all descriptors. */ 1405 if (uap->name == _PC_ASYNC_IO) { 1406 td->td_retval[0] = async_io_version; 1407 goto out; 1408 } 1409 vp = fp->f_vnode; 1410 if (vp != NULL) { 1411 vn_lock(vp, LK_SHARED | LK_RETRY); 1412 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1413 VOP_UNLOCK(vp, 0); 1414 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1415 if (uap->name != _PC_PIPE_BUF) { 1416 error = EINVAL; 1417 } else { 1418 td->td_retval[0] = PIPE_BUF; 1419 error = 0; 1420 } 1421 } else { 1422 error = EOPNOTSUPP; 1423 } 1424out: 1425 fdrop(fp, td); 1426 return (error); 1427} 1428 1429/* 1430 * Initialize filecaps structure. 1431 */ 1432void 1433filecaps_init(struct filecaps *fcaps) 1434{ 1435 1436 bzero(fcaps, sizeof(*fcaps)); 1437 fcaps->fc_nioctls = -1; 1438} 1439 1440/* 1441 * Copy filecaps structure allocating memory for ioctls array if needed. 1442 */ 1443void 1444filecaps_copy(const struct filecaps *src, struct filecaps *dst) 1445{ 1446 size_t size; 1447 1448 *dst = *src; 1449 if (src->fc_ioctls != NULL) { 1450 KASSERT(src->fc_nioctls > 0, 1451 ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); 1452 1453 size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; 1454 dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK); 1455 bcopy(src->fc_ioctls, dst->fc_ioctls, size); 1456 } 1457} 1458 1459/* 1460 * Move filecaps structure to the new place and clear the old place. 1461 */ 1462void 1463filecaps_move(struct filecaps *src, struct filecaps *dst) 1464{ 1465 1466 *dst = *src; 1467 bzero(src, sizeof(*src)); 1468} 1469 1470/* 1471 * Fill the given filecaps structure with full rights. 1472 */ 1473static void 1474filecaps_fill(struct filecaps *fcaps) 1475{ 1476 1477 CAP_ALL(&fcaps->fc_rights); 1478 fcaps->fc_ioctls = NULL; 1479 fcaps->fc_nioctls = -1; 1480 fcaps->fc_fcntls = CAP_FCNTL_ALL; 1481} 1482 1483/* 1484 * Free memory allocated within filecaps structure. 1485 */ 1486void 1487filecaps_free(struct filecaps *fcaps) 1488{ 1489 1490 free(fcaps->fc_ioctls, M_FILECAPS); 1491 bzero(fcaps, sizeof(*fcaps)); 1492} 1493 1494/* 1495 * Validate the given filecaps structure. 1496 */ 1497static void 1498filecaps_validate(const struct filecaps *fcaps, const char *func) 1499{ 1500 1501 KASSERT(cap_rights_is_valid(&fcaps->fc_rights), 1502 ("%s: invalid rights", func)); 1503 KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0, 1504 ("%s: invalid fcntls", func)); 1505 KASSERT(fcaps->fc_fcntls == 0 || 1506 cap_rights_is_set(&fcaps->fc_rights, CAP_FCNTL), 1507 ("%s: fcntls without CAP_FCNTL", func)); 1508 KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 : 1509 (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0), 1510 ("%s: invalid ioctls", func)); 1511 KASSERT(fcaps->fc_nioctls == 0 || 1512 cap_rights_is_set(&fcaps->fc_rights, CAP_IOCTL), 1513 ("%s: ioctls without CAP_IOCTL", func)); 1514} 1515 1516static void 1517fdgrowtable_exp(struct filedesc *fdp, int nfd) 1518{ 1519 int nfd1; 1520 1521 FILEDESC_XLOCK_ASSERT(fdp); 1522 1523 nfd1 = fdp->fd_nfiles * 2; 1524 if (nfd1 < nfd) 1525 nfd1 = nfd; 1526 fdgrowtable(fdp, nfd1); 1527} 1528 1529/* 1530 * Grow the file table to accomodate (at least) nfd descriptors. 1531 */ 1532static void 1533fdgrowtable(struct filedesc *fdp, int nfd) 1534{ 1535 struct filedesc0 *fdp0; 1536 struct freetable *ft; 1537 struct filedescent *ntable; 1538 struct filedescent *otable; 1539 int nnfiles, onfiles; 1540 NDSLOTTYPE *nmap, *omap; 1541 1542 FILEDESC_XLOCK_ASSERT(fdp); 1543 1544 KASSERT(fdp->fd_nfiles > 0, ("zero-length file table")); 1545 1546 /* save old values */ 1547 onfiles = fdp->fd_nfiles; 1548 otable = fdp->fd_ofiles; 1549 omap = fdp->fd_map; 1550 1551 /* compute the size of the new table */ 1552 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1553 if (nnfiles <= onfiles) 1554 /* the table is already large enough */ 1555 return; 1556 1557 /* 1558 * Allocate a new table. We need enough space for the 1559 * file entries themselves and the struct freetable we will use 1560 * when we decommission the table and place it on the freelist. 1561 * We place the struct freetable in the middle so we don't have 1562 * to worry about padding. 1563 */ 1564 ntable = malloc(nnfiles * sizeof(ntable[0]) + sizeof(struct freetable), 1565 M_FILEDESC, M_ZERO | M_WAITOK); 1566 /* copy the old data over and point at the new tables */ 1567 memcpy(ntable, otable, onfiles * sizeof(*otable)); 1568 fdp->fd_ofiles = ntable; 1569 1570 /* 1571 * Allocate a new map only if the old is not large enough. It will 1572 * grow at a slower rate than the table as it can map more 1573 * entries than the table can hold. 1574 */ 1575 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1576 nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, 1577 M_ZERO | M_WAITOK); 1578 /* copy over the old data and update the pointer */ 1579 memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap)); 1580 fdp->fd_map = nmap; 1581 } 1582 1583 /* 1584 * In order to have a valid pattern for fget_unlocked() 1585 * fdp->fd_nfiles must be the last member to be updated, otherwise 1586 * fget_unlocked() consumers may reference a new, higher value for 1587 * fdp->fd_nfiles before to access the fdp->fd_ofiles array, 1588 * resulting in OOB accesses. 1589 */ 1590 atomic_store_rel_int(&fdp->fd_nfiles, nnfiles); 1591 1592 /* 1593 * Do not free the old file table, as some threads may still 1594 * reference entries within it. Instead, place it on a freelist 1595 * which will be processed when the struct filedesc is released. 1596 * 1597 * Note that if onfiles == NDFILE, we're dealing with the original 1598 * static allocation contained within (struct filedesc0 *)fdp, 1599 * which must not be freed. 1600 */ 1601 if (onfiles > NDFILE) { 1602 ft = (struct freetable *)&otable[onfiles]; 1603 fdp0 = (struct filedesc0 *)fdp; 1604 ft->ft_table = otable; 1605 SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next); 1606 } 1607 /* 1608 * The map does not have the same possibility of threads still 1609 * holding references to it. So always free it as long as it 1610 * does not reference the original static allocation. 1611 */ 1612 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1613 free(omap, M_FILEDESC); 1614} 1615 1616/* 1617 * Allocate a file descriptor for the process. 1618 */ 1619int 1620fdalloc(struct thread *td, int minfd, int *result) 1621{ 1622 struct proc *p = td->td_proc; 1623 struct filedesc *fdp = p->p_fd; 1624 int fd = -1, maxfd, allocfd; 1625#ifdef RACCT 1626 int error; 1627#endif 1628 1629 FILEDESC_XLOCK_ASSERT(fdp); 1630 1631 if (fdp->fd_freefile > minfd) 1632 minfd = fdp->fd_freefile; 1633 1634 maxfd = getmaxfd(p); 1635 1636 /* 1637 * Search the bitmap for a free descriptor starting at minfd. 1638 * If none is found, grow the file table. 1639 */ 1640 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1641 if (fd >= maxfd) 1642 return (EMFILE); 1643 if (fd >= fdp->fd_nfiles) { 1644 allocfd = min(fd * 2, maxfd); 1645#ifdef RACCT 1646 if (racct_enable) { 1647 PROC_LOCK(p); 1648 error = racct_set(p, RACCT_NOFILE, allocfd); 1649 PROC_UNLOCK(p); 1650 if (error != 0) 1651 return (EMFILE); 1652 } 1653#endif 1654 /* 1655 * fd is already equal to first free descriptor >= minfd, so 1656 * we only need to grow the table and we are done. 1657 */ 1658 fdgrowtable_exp(fdp, allocfd); 1659 } 1660 1661 /* 1662 * Perform some sanity checks, then mark the file descriptor as 1663 * used and return it to the caller. 1664 */ 1665 KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles), 1666 ("invalid descriptor %d", fd)); 1667 KASSERT(!fdisused(fdp, fd), 1668 ("fd_first_free() returned non-free descriptor")); 1669 KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, 1670 ("file descriptor isn't free")); 1671 KASSERT(fdp->fd_ofiles[fd].fde_flags == 0, ("file flags are set")); 1672 fdused(fdp, fd); 1673 *result = fd; 1674 return (0); 1675} 1676 1677/* 1678 * Allocate n file descriptors for the process. 1679 */ 1680int 1681fdallocn(struct thread *td, int minfd, int *fds, int n) 1682{ 1683 struct proc *p = td->td_proc; 1684 struct filedesc *fdp = p->p_fd; 1685 int i; 1686 1687 FILEDESC_XLOCK_ASSERT(fdp); 1688 1689 if (!fdavail(td, n)) 1690 return (EMFILE); 1691 1692 for (i = 0; i < n; i++) 1693 if (fdalloc(td, 0, &fds[i]) != 0) 1694 break; 1695 1696 if (i < n) { 1697 for (i--; i >= 0; i--) 1698 fdunused(fdp, fds[i]); 1699 return (EMFILE); 1700 } 1701 1702 return (0); 1703} 1704 1705/* 1706 * Check to see whether n user file descriptors are available to the process 1707 * p. 1708 */ 1709int 1710fdavail(struct thread *td, int n) 1711{ 1712 struct proc *p = td->td_proc; 1713 struct filedesc *fdp = td->td_proc->p_fd; 1714 int i, lim, last; 1715 1716 FILEDESC_LOCK_ASSERT(fdp); 1717 1718 /* 1719 * XXX: This is only called from uipc_usrreq.c:unp_externalize(); 1720 * call racct_add() from there instead of dealing with containers 1721 * here. 1722 */ 1723 lim = getmaxfd(p); 1724 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1725 return (1); 1726 last = min(fdp->fd_nfiles, lim); 1727 for (i = fdp->fd_freefile; i < last; i++) { 1728 if (fdp->fd_ofiles[i].fde_file == NULL && --n <= 0) 1729 return (1); 1730 } 1731 return (0); 1732} 1733 1734/* 1735 * Create a new open file structure and allocate a file decriptor for the 1736 * process that refers to it. We add one reference to the file for the 1737 * descriptor table and one reference for resultfp. This is to prevent us 1738 * being preempted and the entry in the descriptor table closed after we 1739 * release the FILEDESC lock. 1740 */ 1741int 1742falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags) 1743{ 1744 struct file *fp; 1745 int error, fd; 1746 1747 error = falloc_noinstall(td, &fp); 1748 if (error) 1749 return (error); /* no reference held on error */ 1750 1751 error = finstall(td, fp, &fd, flags, NULL); 1752 if (error) { 1753 fdrop(fp, td); /* one reference (fp only) */ 1754 return (error); 1755 } 1756 1757 if (resultfp != NULL) 1758 *resultfp = fp; /* copy out result */ 1759 else 1760 fdrop(fp, td); /* release local reference */ 1761 1762 if (resultfd != NULL) 1763 *resultfd = fd; 1764 1765 return (0); 1766} 1767 1768/* 1769 * Create a new open file structure without allocating a file descriptor. 1770 */ 1771int 1772falloc_noinstall(struct thread *td, struct file **resultfp) 1773{ 1774 struct file *fp; 1775 int maxuserfiles = maxfiles - (maxfiles / 20); 1776 static struct timeval lastfail; 1777 static int curfail; 1778 1779 KASSERT(resultfp != NULL, ("%s: resultfp == NULL", __func__)); 1780 1781 if ((openfiles >= maxuserfiles && 1782 priv_check(td, PRIV_MAXFILES) != 0) || 1783 openfiles >= maxfiles) { 1784 if (ppsratecheck(&lastfail, &curfail, 1)) { 1785 printf("kern.maxfiles limit exceeded by uid %i, " 1786 "please see tuning(7).\n", td->td_ucred->cr_ruid); 1787 } 1788 return (ENFILE); 1789 } 1790 atomic_add_int(&openfiles, 1); 1791 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1792 refcount_init(&fp->f_count, 1); 1793 fp->f_cred = crhold(td->td_ucred); 1794 fp->f_ops = &badfileops; 1795 fp->f_data = NULL; 1796 fp->f_vnode = NULL; 1797 *resultfp = fp; 1798 return (0); 1799} 1800 1801/* 1802 * Install a file in a file descriptor table. 1803 */ 1804int 1805finstall(struct thread *td, struct file *fp, int *fd, int flags, 1806 struct filecaps *fcaps) 1807{ 1808 struct filedesc *fdp = td->td_proc->p_fd; 1809 struct filedescent *fde; 1810 int error; 1811 1812 KASSERT(fd != NULL, ("%s: fd == NULL", __func__)); 1813 KASSERT(fp != NULL, ("%s: fp == NULL", __func__)); 1814 if (fcaps != NULL) 1815 filecaps_validate(fcaps, __func__); 1816 1817 FILEDESC_XLOCK(fdp); 1818 if ((error = fdalloc(td, 0, fd))) { 1819 FILEDESC_XUNLOCK(fdp); 1820 return (error); 1821 } 1822 fhold(fp); 1823 fde = &fdp->fd_ofiles[*fd]; 1824#ifdef CAPABILITIES 1825 seq_write_begin(&fde->fde_seq); 1826#endif 1827 fde->fde_file = fp; 1828 if ((flags & O_CLOEXEC) != 0) 1829 fde->fde_flags |= UF_EXCLOSE; 1830 if (fcaps != NULL) 1831 filecaps_move(fcaps, &fde->fde_caps); 1832 else 1833 filecaps_fill(&fde->fde_caps); 1834#ifdef CAPABILITIES 1835 seq_write_end(&fde->fde_seq); 1836#endif 1837 FILEDESC_XUNLOCK(fdp); 1838 return (0); 1839} 1840 1841/* 1842 * Build a new filedesc structure from another. 1843 * Copy the current, root, and jail root vnode references. 1844 */ 1845struct filedesc * 1846fdinit(struct filedesc *fdp) 1847{ 1848 struct filedesc0 *newfdp; 1849 1850 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1851 FILEDESC_LOCK_INIT(&newfdp->fd_fd); 1852 if (fdp != NULL) { 1853 FILEDESC_SLOCK(fdp); 1854 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1855 if (newfdp->fd_fd.fd_cdir) 1856 VREF(newfdp->fd_fd.fd_cdir); 1857 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1858 if (newfdp->fd_fd.fd_rdir) 1859 VREF(newfdp->fd_fd.fd_rdir); 1860 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1861 if (newfdp->fd_fd.fd_jdir) 1862 VREF(newfdp->fd_fd.fd_jdir); 1863 FILEDESC_SUNLOCK(fdp); 1864 } 1865 1866 /* Create the file descriptor table. */ 1867 newfdp->fd_fd.fd_refcnt = 1; 1868 newfdp->fd_fd.fd_holdcnt = 1; 1869 newfdp->fd_fd.fd_cmask = CMASK; 1870 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1871 newfdp->fd_fd.fd_nfiles = NDFILE; 1872 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1873 newfdp->fd_fd.fd_lastfile = -1; 1874 return (&newfdp->fd_fd); 1875} 1876 1877static struct filedesc * 1878fdhold(struct proc *p) 1879{ 1880 struct filedesc *fdp; 1881 1882 mtx_lock(&fdesc_mtx); 1883 fdp = p->p_fd; 1884 if (fdp != NULL) 1885 fdp->fd_holdcnt++; 1886 mtx_unlock(&fdesc_mtx); 1887 return (fdp); 1888} 1889 1890static void 1891fddrop(struct filedesc *fdp) 1892{ 1893 struct filedesc0 *fdp0; 1894 struct freetable *ft; 1895 int i; 1896 1897 mtx_lock(&fdesc_mtx); 1898 i = --fdp->fd_holdcnt; 1899 mtx_unlock(&fdesc_mtx); 1900 if (i > 0) 1901 return; 1902 1903 FILEDESC_LOCK_DESTROY(fdp); 1904 fdp0 = (struct filedesc0 *)fdp; 1905 while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) { 1906 SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next); 1907 free(ft->ft_table, M_FILEDESC); 1908 } 1909 free(fdp, M_FILEDESC); 1910} 1911 1912/* 1913 * Share a filedesc structure. 1914 */ 1915struct filedesc * 1916fdshare(struct filedesc *fdp) 1917{ 1918 1919 FILEDESC_XLOCK(fdp); 1920 fdp->fd_refcnt++; 1921 FILEDESC_XUNLOCK(fdp); 1922 return (fdp); 1923} 1924 1925/* 1926 * Unshare a filedesc structure, if necessary by making a copy 1927 */ 1928void 1929fdunshare(struct thread *td) 1930{ 1931 struct filedesc *tmp; 1932 struct proc *p = td->td_proc; 1933 1934 if (p->p_fd->fd_refcnt == 1) 1935 return; 1936 1937 tmp = fdcopy(p->p_fd); 1938 fdescfree(td); 1939 p->p_fd = tmp; 1940} 1941 1942/* 1943 * Copy a filedesc structure. A NULL pointer in returns a NULL reference, 1944 * this is to ease callers, not catch errors. 1945 */ 1946struct filedesc * 1947fdcopy(struct filedesc *fdp) 1948{ 1949 struct filedesc *newfdp; 1950 struct filedescent *nfde, *ofde; 1951 int i; 1952 1953 /* Certain daemons might not have file descriptors. */ 1954 if (fdp == NULL) 1955 return (NULL); 1956 1957 newfdp = fdinit(fdp); 1958 FILEDESC_SLOCK(fdp); 1959 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1960 FILEDESC_SUNLOCK(fdp); 1961 FILEDESC_XLOCK(newfdp); 1962 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1963 FILEDESC_XUNLOCK(newfdp); 1964 FILEDESC_SLOCK(fdp); 1965 } 1966 /* copy all passable descriptors (i.e. not kqueue) */ 1967 newfdp->fd_freefile = -1; 1968 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1969 ofde = &fdp->fd_ofiles[i]; 1970 if (fdisused(fdp, i) && 1971 (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) && 1972 ofde->fde_file->f_ops != &badfileops) { 1973 nfde = &newfdp->fd_ofiles[i]; 1974 *nfde = *ofde; 1975 filecaps_copy(&ofde->fde_caps, &nfde->fde_caps); 1976 fhold(nfde->fde_file); 1977 newfdp->fd_lastfile = i; 1978 } else { 1979 if (newfdp->fd_freefile == -1) 1980 newfdp->fd_freefile = i; 1981 } 1982 } 1983 newfdp->fd_cmask = fdp->fd_cmask; 1984 FILEDESC_SUNLOCK(fdp); 1985 FILEDESC_XLOCK(newfdp); 1986 for (i = 0; i <= newfdp->fd_lastfile; ++i) { 1987 if (newfdp->fd_ofiles[i].fde_file != NULL) 1988 fdused(newfdp, i); 1989 } 1990 if (newfdp->fd_freefile == -1) 1991 newfdp->fd_freefile = i; 1992 FILEDESC_XUNLOCK(newfdp); 1993 return (newfdp); 1994} 1995 1996/* 1997 * Release a filedesc structure. 1998 */ 1999void 2000fdescfree(struct thread *td) 2001{ 2002 struct filedesc *fdp; 2003 int i; 2004 struct filedesc_to_leader *fdtol; 2005 struct file *fp; 2006 struct vnode *cdir, *jdir, *rdir, *vp; 2007 struct flock lf; 2008 2009 /* Certain daemons might not have file descriptors. */ 2010 fdp = td->td_proc->p_fd; 2011 if (fdp == NULL) 2012 return; 2013 2014#ifdef RACCT 2015 if (racct_enable) { 2016 PROC_LOCK(td->td_proc); 2017 racct_set(td->td_proc, RACCT_NOFILE, 0); 2018 PROC_UNLOCK(td->td_proc); 2019 } 2020#endif 2021 2022 /* Check for special need to clear POSIX style locks */ 2023 fdtol = td->td_proc->p_fdtol; 2024 if (fdtol != NULL) { 2025 FILEDESC_XLOCK(fdp); 2026 KASSERT(fdtol->fdl_refcount > 0, 2027 ("filedesc_to_refcount botch: fdl_refcount=%d", 2028 fdtol->fdl_refcount)); 2029 if (fdtol->fdl_refcount == 1 && 2030 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 2031 for (i = 0; i <= fdp->fd_lastfile; i++) { 2032 fp = fdp->fd_ofiles[i].fde_file; 2033 if (fp == NULL || fp->f_type != DTYPE_VNODE) 2034 continue; 2035 fhold(fp); 2036 FILEDESC_XUNLOCK(fdp); 2037 lf.l_whence = SEEK_SET; 2038 lf.l_start = 0; 2039 lf.l_len = 0; 2040 lf.l_type = F_UNLCK; 2041 vp = fp->f_vnode; 2042 (void) VOP_ADVLOCK(vp, 2043 (caddr_t)td->td_proc->p_leader, F_UNLCK, 2044 &lf, F_POSIX); 2045 FILEDESC_XLOCK(fdp); 2046 fdrop(fp, td); 2047 } 2048 } 2049 retry: 2050 if (fdtol->fdl_refcount == 1) { 2051 if (fdp->fd_holdleaderscount > 0 && 2052 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 2053 /* 2054 * close() or do_dup() has cleared a reference 2055 * in a shared file descriptor table. 2056 */ 2057 fdp->fd_holdleaderswakeup = 1; 2058 sx_sleep(&fdp->fd_holdleaderscount, 2059 FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); 2060 goto retry; 2061 } 2062 if (fdtol->fdl_holdcount > 0) { 2063 /* 2064 * Ensure that fdtol->fdl_leader remains 2065 * valid in closef(). 2066 */ 2067 fdtol->fdl_wakeup = 1; 2068 sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, 2069 "fdlhold", 0); 2070 goto retry; 2071 } 2072 } 2073 fdtol->fdl_refcount--; 2074 if (fdtol->fdl_refcount == 0 && 2075 fdtol->fdl_holdcount == 0) { 2076 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 2077 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 2078 } else 2079 fdtol = NULL; 2080 td->td_proc->p_fdtol = NULL; 2081 FILEDESC_XUNLOCK(fdp); 2082 if (fdtol != NULL) 2083 free(fdtol, M_FILEDESC_TO_LEADER); 2084 } 2085 2086 mtx_lock(&fdesc_mtx); 2087 td->td_proc->p_fd = NULL; 2088 mtx_unlock(&fdesc_mtx); 2089 2090 FILEDESC_XLOCK(fdp); 2091 i = --fdp->fd_refcnt; 2092 if (i > 0) { 2093 FILEDESC_XUNLOCK(fdp); 2094 return; 2095 } 2096 2097 cdir = fdp->fd_cdir; 2098 fdp->fd_cdir = NULL; 2099 rdir = fdp->fd_rdir; 2100 fdp->fd_rdir = NULL; 2101 jdir = fdp->fd_jdir; 2102 fdp->fd_jdir = NULL; 2103 FILEDESC_XUNLOCK(fdp); 2104 2105 for (i = 0; i <= fdp->fd_lastfile; i++) { 2106 fp = fdp->fd_ofiles[i].fde_file; 2107 if (fp != NULL) { 2108 fdfree_last(fdp, i); 2109 (void) closef(fp, td); 2110 } 2111 } 2112 2113 if (fdp->fd_nfiles > NDFILE) 2114 free(fdp->fd_ofiles, M_FILEDESC); 2115 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 2116 free(fdp->fd_map, M_FILEDESC); 2117 2118 if (cdir != NULL) 2119 vrele(cdir); 2120 if (rdir != NULL) 2121 vrele(rdir); 2122 if (jdir != NULL) 2123 vrele(jdir); 2124 2125 fddrop(fdp); 2126} 2127 2128/* 2129 * For setugid programs, we don't want to people to use that setugidness 2130 * to generate error messages which write to a file which otherwise would 2131 * otherwise be off-limits to the process. We check for filesystems where 2132 * the vnode can change out from under us after execve (like [lin]procfs). 2133 * 2134 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 2135 * sufficient. We also don't check for setugidness since we know we are. 2136 */ 2137static int 2138is_unsafe(struct file *fp) 2139{ 2140 if (fp->f_type == DTYPE_VNODE) { 2141 struct vnode *vp = fp->f_vnode; 2142 2143 if ((vp->v_vflag & VV_PROCDEP) != 0) 2144 return (1); 2145 } 2146 return (0); 2147} 2148 2149/* 2150 * Make this setguid thing safe, if at all possible. 2151 */ 2152void 2153setugidsafety(struct thread *td) 2154{ 2155 struct filedesc *fdp; 2156 struct file *fp; 2157 int i; 2158 2159 fdp = td->td_proc->p_fd; 2160 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 2161 FILEDESC_XLOCK(fdp); 2162 for (i = 0; i <= fdp->fd_lastfile; i++) { 2163 if (i > 2) 2164 break; 2165 fp = fdp->fd_ofiles[i].fde_file; 2166 if (fp != NULL && is_unsafe(fp)) { 2167 knote_fdclose(td, i); 2168 /* 2169 * NULL-out descriptor prior to close to avoid 2170 * a race while close blocks. 2171 */ 2172 fdfree(fdp, i); 2173 FILEDESC_XUNLOCK(fdp); 2174 (void) closef(fp, td); 2175 FILEDESC_XLOCK(fdp); 2176 } 2177 } 2178 FILEDESC_XUNLOCK(fdp); 2179} 2180 2181/* 2182 * If a specific file object occupies a specific file descriptor, close the 2183 * file descriptor entry and drop a reference on the file object. This is a 2184 * convenience function to handle a subsequent error in a function that calls 2185 * falloc() that handles the race that another thread might have closed the 2186 * file descriptor out from under the thread creating the file object. 2187 */ 2188void 2189fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 2190{ 2191 2192 FILEDESC_XLOCK(fdp); 2193 if (fdp->fd_ofiles[idx].fde_file == fp) { 2194 fdfree(fdp, idx); 2195 FILEDESC_XUNLOCK(fdp); 2196 fdrop(fp, td); 2197 } else 2198 FILEDESC_XUNLOCK(fdp); 2199} 2200 2201/* 2202 * Close any files on exec? 2203 */ 2204void 2205fdcloseexec(struct thread *td) 2206{ 2207 struct filedesc *fdp; 2208 struct filedescent *fde; 2209 struct file *fp; 2210 int i; 2211 2212 fdp = td->td_proc->p_fd; 2213 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 2214 FILEDESC_XLOCK(fdp); 2215 for (i = 0; i <= fdp->fd_lastfile; i++) { 2216 fde = &fdp->fd_ofiles[i]; 2217 fp = fde->fde_file; 2218 if (fp != NULL && (fp->f_type == DTYPE_MQUEUE || 2219 (fde->fde_flags & UF_EXCLOSE))) { 2220 fdfree(fdp, i); 2221 (void) closefp(fdp, i, fp, td, 0); 2222 /* closefp() drops the FILEDESC lock. */ 2223 FILEDESC_XLOCK(fdp); 2224 } 2225 } 2226 FILEDESC_XUNLOCK(fdp); 2227} 2228 2229/* 2230 * It is unsafe for set[ug]id processes to be started with file 2231 * descriptors 0..2 closed, as these descriptors are given implicit 2232 * significance in the Standard C library. fdcheckstd() will create a 2233 * descriptor referencing /dev/null for each of stdin, stdout, and 2234 * stderr that is not already open. 2235 */ 2236int 2237fdcheckstd(struct thread *td) 2238{ 2239 struct filedesc *fdp; 2240 register_t retval, save; 2241 int i, error, devnull; 2242 2243 fdp = td->td_proc->p_fd; 2244 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 2245 devnull = -1; 2246 error = 0; 2247 for (i = 0; i < 3; i++) { 2248 if (fdp->fd_ofiles[i].fde_file != NULL) 2249 continue; 2250 if (devnull < 0) { 2251 save = td->td_retval[0]; 2252 error = kern_open(td, "/dev/null", UIO_SYSSPACE, 2253 O_RDWR, 0); 2254 devnull = td->td_retval[0]; 2255 td->td_retval[0] = save; 2256 if (error) 2257 break; 2258 KASSERT(devnull == i, ("oof, we didn't get our fd")); 2259 } else { 2260 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 2261 if (error != 0) 2262 break; 2263 } 2264 } 2265 return (error); 2266} 2267 2268/* 2269 * Internal form of close. Decrement reference count on file structure. 2270 * Note: td may be NULL when closing a file that was being passed in a 2271 * message. 2272 * 2273 * XXXRW: Giant is not required for the caller, but often will be held; this 2274 * makes it moderately likely the Giant will be recursed in the VFS case. 2275 */ 2276int 2277closef(struct file *fp, struct thread *td) 2278{ 2279 struct vnode *vp; 2280 struct flock lf; 2281 struct filedesc_to_leader *fdtol; 2282 struct filedesc *fdp; 2283 2284 /* 2285 * POSIX record locking dictates that any close releases ALL 2286 * locks owned by this process. This is handled by setting 2287 * a flag in the unlock to free ONLY locks obeying POSIX 2288 * semantics, and not to free BSD-style file locks. 2289 * If the descriptor was in a message, POSIX-style locks 2290 * aren't passed with the descriptor, and the thread pointer 2291 * will be NULL. Callers should be careful only to pass a 2292 * NULL thread pointer when there really is no owning 2293 * context that might have locks, or the locks will be 2294 * leaked. 2295 */ 2296 if (fp->f_type == DTYPE_VNODE && td != NULL) { 2297 vp = fp->f_vnode; 2298 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 2299 lf.l_whence = SEEK_SET; 2300 lf.l_start = 0; 2301 lf.l_len = 0; 2302 lf.l_type = F_UNLCK; 2303 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 2304 F_UNLCK, &lf, F_POSIX); 2305 } 2306 fdtol = td->td_proc->p_fdtol; 2307 if (fdtol != NULL) { 2308 /* 2309 * Handle special case where file descriptor table is 2310 * shared between multiple process leaders. 2311 */ 2312 fdp = td->td_proc->p_fd; 2313 FILEDESC_XLOCK(fdp); 2314 for (fdtol = fdtol->fdl_next; 2315 fdtol != td->td_proc->p_fdtol; 2316 fdtol = fdtol->fdl_next) { 2317 if ((fdtol->fdl_leader->p_flag & 2318 P_ADVLOCK) == 0) 2319 continue; 2320 fdtol->fdl_holdcount++; 2321 FILEDESC_XUNLOCK(fdp); 2322 lf.l_whence = SEEK_SET; 2323 lf.l_start = 0; 2324 lf.l_len = 0; 2325 lf.l_type = F_UNLCK; 2326 vp = fp->f_vnode; 2327 (void) VOP_ADVLOCK(vp, 2328 (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, 2329 F_POSIX); 2330 FILEDESC_XLOCK(fdp); 2331 fdtol->fdl_holdcount--; 2332 if (fdtol->fdl_holdcount == 0 && 2333 fdtol->fdl_wakeup != 0) { 2334 fdtol->fdl_wakeup = 0; 2335 wakeup(fdtol); 2336 } 2337 } 2338 FILEDESC_XUNLOCK(fdp); 2339 } 2340 } 2341 return (fdrop(fp, td)); 2342} 2343 2344/* 2345 * Initialize the file pointer with the specified properties. 2346 * 2347 * The ops are set with release semantics to be certain that the flags, type, 2348 * and data are visible when ops is. This is to prevent ops methods from being 2349 * called with bad data. 2350 */ 2351void 2352finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) 2353{ 2354 fp->f_data = data; 2355 fp->f_flag = flag; 2356 fp->f_type = type; 2357 atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); 2358} 2359 2360int 2361fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, 2362 int needfcntl, struct file **fpp, cap_rights_t *haverightsp) 2363{ 2364#ifdef CAPABILITIES 2365 struct filedescent fde; 2366#endif 2367 struct file *fp; 2368 u_int count; 2369#ifdef CAPABILITIES 2370 seq_t seq; 2371 cap_rights_t haverights; 2372 int error; 2373#endif 2374 2375 /* 2376 * Avoid reads reordering and then a first access to the 2377 * fdp->fd_ofiles table which could result in OOB operation. 2378 */ 2379 if (fd < 0 || fd >= atomic_load_acq_int(&fdp->fd_nfiles)) 2380 return (EBADF); 2381 /* 2382 * Fetch the descriptor locklessly. We avoid fdrop() races by 2383 * never raising a refcount above 0. To accomplish this we have 2384 * to use a cmpset loop rather than an atomic_add. The descriptor 2385 * must be re-verified once we acquire a reference to be certain 2386 * that the identity is still correct and we did not lose a race 2387 * due to preemption. 2388 */ 2389 for (;;) { 2390#ifdef CAPABILITIES 2391 seq = seq_read(fd_seq(fdp, fd)); 2392 fde = fdp->fd_ofiles[fd]; 2393 if (!seq_consistent(fd_seq(fdp, fd), seq)) { 2394 cpu_spinwait(); 2395 continue; 2396 } 2397 fp = fde.fde_file; 2398#else 2399 fp = fdp->fd_ofiles[fd].fde_file; 2400#endif 2401 if (fp == NULL) 2402 return (EBADF); 2403#ifdef CAPABILITIES 2404 haverights = *cap_rights_fde(&fde); 2405 if (needrightsp != NULL) { 2406 error = cap_check(&haverights, needrightsp); 2407 if (error != 0) 2408 return (error); 2409 if (cap_rights_is_set(needrightsp, CAP_FCNTL)) { 2410 error = cap_fcntl_check_fde(&fde, needfcntl); 2411 if (error != 0) 2412 return (error); 2413 } 2414 } 2415#endif 2416 count = fp->f_count; 2417 if (count == 0) 2418 continue; 2419 /* 2420 * Use an acquire barrier to prevent caching of fd_ofiles 2421 * so it is refreshed for verification. 2422 */ 2423 if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) 2424 continue; 2425#ifdef CAPABILITIES 2426 if (seq_consistent_nomb(fd_seq(fdp, fd), seq)) 2427#else 2428 if (fp == fdp->fd_ofiles[fd].fde_file) 2429#endif 2430 break; 2431 fdrop(fp, curthread); 2432 } 2433 *fpp = fp; 2434 if (haverightsp != NULL) { 2435#ifdef CAPABILITIES 2436 *haverightsp = haverights; 2437#else 2438 CAP_ALL(haverightsp); 2439#endif 2440 } 2441 return (0); 2442} 2443 2444/* 2445 * Extract the file pointer associated with the specified descriptor for the 2446 * current user process. 2447 * 2448 * If the descriptor doesn't exist or doesn't match 'flags', EBADF is 2449 * returned. 2450 * 2451 * File's rights will be checked against the capability rights mask. 2452 * 2453 * If an error occured the non-zero error is returned and *fpp is set to 2454 * NULL. Otherwise *fpp is held and set and zero is returned. Caller is 2455 * responsible for fdrop(). 2456 */ 2457static __inline int 2458_fget(struct thread *td, int fd, struct file **fpp, int flags, 2459 cap_rights_t *needrightsp, u_char *maxprotp) 2460{ 2461 struct filedesc *fdp; 2462 struct file *fp; 2463 cap_rights_t haverights, needrights; 2464 int error; 2465 2466 *fpp = NULL; 2467 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 2468 return (EBADF); 2469 if (needrightsp != NULL) 2470 needrights = *needrightsp; 2471 else 2472 cap_rights_init(&needrights); 2473 if (maxprotp != NULL) 2474 cap_rights_set(&needrights, CAP_MMAP); 2475 error = fget_unlocked(fdp, fd, &needrights, 0, &fp, &haverights); 2476 if (error != 0) 2477 return (error); 2478 if (fp->f_ops == &badfileops) { 2479 fdrop(fp, td); 2480 return (EBADF); 2481 } 2482 2483#ifdef CAPABILITIES 2484 /* 2485 * If requested, convert capability rights to access flags. 2486 */ 2487 if (maxprotp != NULL) 2488 *maxprotp = cap_rights_to_vmprot(&haverights); 2489#else /* !CAPABILITIES */ 2490 if (maxprotp != NULL) 2491 *maxprotp = VM_PROT_ALL; 2492#endif /* CAPABILITIES */ 2493 2494 /* 2495 * FREAD and FWRITE failure return EBADF as per POSIX. 2496 */ 2497 error = 0; 2498 switch (flags) { 2499 case FREAD: 2500 case FWRITE: 2501 if ((fp->f_flag & flags) == 0) 2502 error = EBADF; 2503 break; 2504 case FEXEC: 2505 if ((fp->f_flag & (FREAD | FEXEC)) == 0 || 2506 ((fp->f_flag & FWRITE) != 0)) 2507 error = EBADF; 2508 break; 2509 case 0: 2510 break; 2511 default: 2512 KASSERT(0, ("wrong flags")); 2513 } 2514 2515 if (error != 0) { 2516 fdrop(fp, td); 2517 return (error); 2518 } 2519 2520 *fpp = fp; 2521 return (0); 2522} 2523 2524int 2525fget(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 2526{ 2527 2528 return(_fget(td, fd, fpp, 0, rightsp, NULL)); 2529} 2530 2531int 2532fget_mmap(struct thread *td, int fd, cap_rights_t *rightsp, u_char *maxprotp, 2533 struct file **fpp) 2534{ 2535 2536 return (_fget(td, fd, fpp, 0, rightsp, maxprotp)); 2537} 2538 2539int 2540fget_read(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 2541{ 2542 2543 return(_fget(td, fd, fpp, FREAD, rightsp, NULL)); 2544} 2545 2546int 2547fget_write(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 2548{ 2549 2550 return (_fget(td, fd, fpp, FWRITE, rightsp, NULL)); 2551} 2552 2553/* 2554 * Like fget() but loads the underlying vnode, or returns an error if the 2555 * descriptor does not represent a vnode. Note that pipes use vnodes but 2556 * never have VM objects. The returned vnode will be vref()'d. 2557 * 2558 * XXX: what about the unused flags ? 2559 */ 2560static __inline int 2561_fgetvp(struct thread *td, int fd, int flags, cap_rights_t *needrightsp, 2562 struct vnode **vpp) 2563{ 2564 struct file *fp; 2565 int error; 2566 2567 *vpp = NULL; 2568 error = _fget(td, fd, &fp, flags, needrightsp, NULL); 2569 if (error != 0) 2570 return (error); 2571 if (fp->f_vnode == NULL) { 2572 error = EINVAL; 2573 } else { 2574 *vpp = fp->f_vnode; 2575 vref(*vpp); 2576 } 2577 fdrop(fp, td); 2578 2579 return (error); 2580} 2581 2582int 2583fgetvp(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) 2584{ 2585 2586 return (_fgetvp(td, fd, 0, rightsp, vpp)); 2587} 2588 2589int 2590fgetvp_rights(struct thread *td, int fd, cap_rights_t *needrightsp, 2591 struct filecaps *havecaps, struct vnode **vpp) 2592{ 2593 struct filedesc *fdp; 2594 struct file *fp; 2595#ifdef CAPABILITIES 2596 int error; 2597#endif 2598 2599 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 2600 return (EBADF); 2601 2602 fp = fget_locked(fdp, fd); 2603 if (fp == NULL || fp->f_ops == &badfileops) 2604 return (EBADF); 2605 2606#ifdef CAPABILITIES 2607 if (needrightsp != NULL) { 2608 error = cap_check(cap_rights(fdp, fd), needrightsp); 2609 if (error != 0) 2610 return (error); 2611 } 2612#endif 2613 2614 if (fp->f_vnode == NULL) 2615 return (EINVAL); 2616 2617 *vpp = fp->f_vnode; 2618 vref(*vpp); 2619 filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, havecaps); 2620 2621 return (0); 2622} 2623 2624int 2625fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) 2626{ 2627 2628 return (_fgetvp(td, fd, FREAD, rightsp, vpp)); 2629} 2630 2631int 2632fgetvp_exec(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) 2633{ 2634 2635 return (_fgetvp(td, fd, FEXEC, rightsp, vpp)); 2636} 2637 2638#ifdef notyet 2639int 2640fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp, 2641 struct vnode **vpp) 2642{ 2643 2644 return (_fgetvp(td, fd, FWRITE, rightsp, vpp)); 2645} 2646#endif 2647 2648/* 2649 * Like fget() but loads the underlying socket, or returns an error if the 2650 * descriptor does not represent a socket. 2651 * 2652 * We bump the ref count on the returned socket. XXX Also obtain the SX lock 2653 * in the future. 2654 * 2655 * Note: fgetsock() and fputsock() are deprecated, as consumers should rely 2656 * on their file descriptor reference to prevent the socket from being free'd 2657 * during use. 2658 */ 2659int 2660fgetsock(struct thread *td, int fd, cap_rights_t *rightsp, struct socket **spp, 2661 u_int *fflagp) 2662{ 2663 struct file *fp; 2664 int error; 2665 2666 *spp = NULL; 2667 if (fflagp != NULL) 2668 *fflagp = 0; 2669 if ((error = _fget(td, fd, &fp, 0, rightsp, NULL)) != 0) 2670 return (error); 2671 if (fp->f_type != DTYPE_SOCKET) { 2672 error = ENOTSOCK; 2673 } else { 2674 *spp = fp->f_data; 2675 if (fflagp) 2676 *fflagp = fp->f_flag; 2677 SOCK_LOCK(*spp); 2678 soref(*spp); 2679 SOCK_UNLOCK(*spp); 2680 } 2681 fdrop(fp, td); 2682 2683 return (error); 2684} 2685 2686/* 2687 * Drop the reference count on the socket and XXX release the SX lock in the 2688 * future. The last reference closes the socket. 2689 * 2690 * Note: fputsock() is deprecated, see comment for fgetsock(). 2691 */ 2692void 2693fputsock(struct socket *so) 2694{ 2695 2696 ACCEPT_LOCK(); 2697 SOCK_LOCK(so); 2698 CURVNET_SET(so->so_vnet); 2699 sorele(so); 2700 CURVNET_RESTORE(); 2701} 2702 2703/* 2704 * Handle the last reference to a file being closed. 2705 */ 2706int 2707_fdrop(struct file *fp, struct thread *td) 2708{ 2709 int error; 2710 2711 error = 0; 2712 if (fp->f_count != 0) 2713 panic("fdrop: count %d", fp->f_count); 2714 if (fp->f_ops != &badfileops) 2715 error = fo_close(fp, td); 2716 atomic_subtract_int(&openfiles, 1); 2717 crfree(fp->f_cred); 2718 free(fp->f_advice, M_FADVISE); 2719 uma_zfree(file_zone, fp); 2720 2721 return (error); 2722} 2723 2724/* 2725 * Apply an advisory lock on a file descriptor. 2726 * 2727 * Just attempt to get a record lock of the requested type on the entire file 2728 * (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2729 */ 2730#ifndef _SYS_SYSPROTO_H_ 2731struct flock_args { 2732 int fd; 2733 int how; 2734}; 2735#endif 2736/* ARGSUSED */ 2737int 2738sys_flock(struct thread *td, struct flock_args *uap) 2739{ 2740 struct file *fp; 2741 struct vnode *vp; 2742 struct flock lf; 2743 cap_rights_t rights; 2744 int error; 2745 2746 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FLOCK), &fp); 2747 if (error != 0) 2748 return (error); 2749 if (fp->f_type != DTYPE_VNODE) { 2750 fdrop(fp, td); 2751 return (EOPNOTSUPP); 2752 } 2753 2754 vp = fp->f_vnode; 2755 lf.l_whence = SEEK_SET; 2756 lf.l_start = 0; 2757 lf.l_len = 0; 2758 if (uap->how & LOCK_UN) { 2759 lf.l_type = F_UNLCK; 2760 atomic_clear_int(&fp->f_flag, FHASLOCK); 2761 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2762 goto done2; 2763 } 2764 if (uap->how & LOCK_EX) 2765 lf.l_type = F_WRLCK; 2766 else if (uap->how & LOCK_SH) 2767 lf.l_type = F_RDLCK; 2768 else { 2769 error = EBADF; 2770 goto done2; 2771 } 2772 atomic_set_int(&fp->f_flag, FHASLOCK); 2773 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2774 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2775done2: 2776 fdrop(fp, td); 2777 return (error); 2778} 2779/* 2780 * Duplicate the specified descriptor to a free descriptor. 2781 */ 2782int 2783dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, 2784 int openerror, int *indxp) 2785{ 2786 struct filedescent *newfde, *oldfde; 2787 struct file *fp; 2788 int error, indx; 2789 2790 KASSERT(openerror == ENODEV || openerror == ENXIO, 2791 ("unexpected error %d in %s", openerror, __func__)); 2792 2793 /* 2794 * If the to-be-dup'd fd number is greater than the allowed number 2795 * of file descriptors, or the fd to be dup'd has already been 2796 * closed, then reject. 2797 */ 2798 FILEDESC_XLOCK(fdp); 2799 if ((fp = fget_locked(fdp, dfd)) == NULL) { 2800 FILEDESC_XUNLOCK(fdp); 2801 return (EBADF); 2802 } 2803 2804 error = fdalloc(td, 0, &indx); 2805 if (error != 0) { 2806 FILEDESC_XUNLOCK(fdp); 2807 return (error); 2808 } 2809 2810 /* 2811 * There are two cases of interest here. 2812 * 2813 * For ENODEV simply dup (dfd) to file descriptor (indx) and return. 2814 * 2815 * For ENXIO steal away the file structure from (dfd) and store it in 2816 * (indx). (dfd) is effectively closed by this operation. 2817 */ 2818 switch (openerror) { 2819 case ENODEV: 2820 /* 2821 * Check that the mode the file is being opened for is a 2822 * subset of the mode of the existing descriptor. 2823 */ 2824 if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { 2825 fdunused(fdp, indx); 2826 FILEDESC_XUNLOCK(fdp); 2827 return (EACCES); 2828 } 2829 fhold(fp); 2830 newfde = &fdp->fd_ofiles[indx]; 2831 oldfde = &fdp->fd_ofiles[dfd]; 2832#ifdef CAPABILITIES 2833 seq_write_begin(&newfde->fde_seq); 2834#endif 2835 memcpy(newfde, oldfde, fde_change_size); 2836 filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); 2837#ifdef CAPABILITIES 2838 seq_write_end(&newfde->fde_seq); 2839#endif 2840 break; 2841 case ENXIO: 2842 /* 2843 * Steal away the file pointer from dfd and stuff it into indx. 2844 */ 2845 newfde = &fdp->fd_ofiles[indx]; 2846 oldfde = &fdp->fd_ofiles[dfd]; 2847#ifdef CAPABILITIES 2848 seq_write_begin(&newfde->fde_seq); 2849#endif 2850 memcpy(newfde, oldfde, fde_change_size); 2851 bzero(oldfde, fde_change_size); 2852 fdunused(fdp, dfd); 2853#ifdef CAPABILITIES 2854 seq_write_end(&newfde->fde_seq); 2855#endif 2856 break; 2857 } 2858 FILEDESC_XUNLOCK(fdp); 2859 *indxp = indx; 2860 return (0); 2861} 2862 2863/* 2864 * Scan all active processes and prisons to see if any of them have a current 2865 * or root directory of `olddp'. If so, replace them with the new mount point. 2866 */ 2867void 2868mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2869{ 2870 struct filedesc *fdp; 2871 struct prison *pr; 2872 struct proc *p; 2873 int nrele; 2874 2875 if (vrefcnt(olddp) == 1) 2876 return; 2877 nrele = 0; 2878 sx_slock(&allproc_lock); 2879 FOREACH_PROC_IN_SYSTEM(p) { 2880 fdp = fdhold(p); 2881 if (fdp == NULL) 2882 continue; 2883 FILEDESC_XLOCK(fdp); 2884 if (fdp->fd_cdir == olddp) { 2885 vref(newdp); 2886 fdp->fd_cdir = newdp; 2887 nrele++; 2888 } 2889 if (fdp->fd_rdir == olddp) { 2890 vref(newdp); 2891 fdp->fd_rdir = newdp; 2892 nrele++; 2893 } 2894 if (fdp->fd_jdir == olddp) { 2895 vref(newdp); 2896 fdp->fd_jdir = newdp; 2897 nrele++; 2898 } 2899 FILEDESC_XUNLOCK(fdp); 2900 fddrop(fdp); 2901 } 2902 sx_sunlock(&allproc_lock); 2903 if (rootvnode == olddp) { 2904 vref(newdp); 2905 rootvnode = newdp; 2906 nrele++; 2907 } 2908 mtx_lock(&prison0.pr_mtx); 2909 if (prison0.pr_root == olddp) { 2910 vref(newdp); 2911 prison0.pr_root = newdp; 2912 nrele++; 2913 } 2914 mtx_unlock(&prison0.pr_mtx); 2915 sx_slock(&allprison_lock); 2916 TAILQ_FOREACH(pr, &allprison, pr_list) { 2917 mtx_lock(&pr->pr_mtx); 2918 if (pr->pr_root == olddp) { 2919 vref(newdp); 2920 pr->pr_root = newdp; 2921 nrele++; 2922 } 2923 mtx_unlock(&pr->pr_mtx); 2924 } 2925 sx_sunlock(&allprison_lock); 2926 while (nrele--) 2927 vrele(olddp); 2928} 2929 2930struct filedesc_to_leader * 2931filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2932{ 2933 struct filedesc_to_leader *fdtol; 2934 2935 fdtol = malloc(sizeof(struct filedesc_to_leader), 2936 M_FILEDESC_TO_LEADER, 2937 M_WAITOK); 2938 fdtol->fdl_refcount = 1; 2939 fdtol->fdl_holdcount = 0; 2940 fdtol->fdl_wakeup = 0; 2941 fdtol->fdl_leader = leader; 2942 if (old != NULL) { 2943 FILEDESC_XLOCK(fdp); 2944 fdtol->fdl_next = old->fdl_next; 2945 fdtol->fdl_prev = old; 2946 old->fdl_next = fdtol; 2947 fdtol->fdl_next->fdl_prev = fdtol; 2948 FILEDESC_XUNLOCK(fdp); 2949 } else { 2950 fdtol->fdl_next = fdtol; 2951 fdtol->fdl_prev = fdtol; 2952 } 2953 return (fdtol); 2954} 2955 2956/* 2957 * Get file structures globally. 2958 */ 2959static int 2960sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2961{ 2962 struct xfile xf; 2963 struct filedesc *fdp; 2964 struct file *fp; 2965 struct proc *p; 2966 int error, n; 2967 2968 error = sysctl_wire_old_buffer(req, 0); 2969 if (error != 0) 2970 return (error); 2971 if (req->oldptr == NULL) { 2972 n = 0; 2973 sx_slock(&allproc_lock); 2974 FOREACH_PROC_IN_SYSTEM(p) { 2975 if (p->p_state == PRS_NEW) 2976 continue; 2977 fdp = fdhold(p); 2978 if (fdp == NULL) 2979 continue; 2980 /* overestimates sparse tables. */ 2981 if (fdp->fd_lastfile > 0) 2982 n += fdp->fd_lastfile; 2983 fddrop(fdp); 2984 } 2985 sx_sunlock(&allproc_lock); 2986 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2987 } 2988 error = 0; 2989 bzero(&xf, sizeof(xf)); 2990 xf.xf_size = sizeof(xf); 2991 sx_slock(&allproc_lock); 2992 FOREACH_PROC_IN_SYSTEM(p) { 2993 PROC_LOCK(p); 2994 if (p->p_state == PRS_NEW) { 2995 PROC_UNLOCK(p); 2996 continue; 2997 } 2998 if (p_cansee(req->td, p) != 0) { 2999 PROC_UNLOCK(p); 3000 continue; 3001 } 3002 xf.xf_pid = p->p_pid; 3003 xf.xf_uid = p->p_ucred->cr_uid; 3004 PROC_UNLOCK(p); 3005 fdp = fdhold(p); 3006 if (fdp == NULL) 3007 continue; 3008 FILEDESC_SLOCK(fdp); 3009 for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) { 3010 if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) 3011 continue; 3012 xf.xf_fd = n; 3013 xf.xf_file = fp; 3014 xf.xf_data = fp->f_data; 3015 xf.xf_vnode = fp->f_vnode; 3016 xf.xf_type = fp->f_type; 3017 xf.xf_count = fp->f_count; 3018 xf.xf_msgcount = 0; 3019 xf.xf_offset = foffset_get(fp); 3020 xf.xf_flag = fp->f_flag; 3021 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 3022 if (error) 3023 break; 3024 } 3025 FILEDESC_SUNLOCK(fdp); 3026 fddrop(fdp); 3027 if (error) 3028 break; 3029 } 3030 sx_sunlock(&allproc_lock); 3031 return (error); 3032} 3033 3034SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE, 3035 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 3036 3037#ifdef KINFO_OFILE_SIZE 3038CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE); 3039#endif 3040 3041#ifdef COMPAT_FREEBSD7 3042static int 3043export_vnode_for_osysctl(struct vnode *vp, int type, 3044 struct kinfo_ofile *kif, struct filedesc *fdp, struct sysctl_req *req) 3045{ 3046 int error; 3047 char *fullpath, *freepath; 3048 3049 bzero(kif, sizeof(*kif)); 3050 kif->kf_structsize = sizeof(*kif); 3051 3052 vref(vp); 3053 kif->kf_fd = type; 3054 kif->kf_type = KF_TYPE_VNODE; 3055 /* This function only handles directories. */ 3056 if (vp->v_type != VDIR) { 3057 vrele(vp); 3058 return (ENOTDIR); 3059 } 3060 kif->kf_vnode_type = KF_VTYPE_VDIR; 3061 3062 /* 3063 * This is not a true file descriptor, so we set a bogus refcount 3064 * and offset to indicate these fields should be ignored. 3065 */ 3066 kif->kf_ref_count = -1; 3067 kif->kf_offset = -1; 3068 3069 freepath = NULL; 3070 fullpath = "-"; 3071 FILEDESC_SUNLOCK(fdp); 3072 vn_fullpath(curthread, vp, &fullpath, &freepath); 3073 vrele(vp); 3074 strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path)); 3075 if (freepath != NULL) 3076 free(freepath, M_TEMP); 3077 error = SYSCTL_OUT(req, kif, sizeof(*kif)); 3078 FILEDESC_SLOCK(fdp); 3079 return (error); 3080} 3081 3082/* 3083 * Get per-process file descriptors for use by procstat(1), et al. 3084 */ 3085static int 3086sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) 3087{ 3088 char *fullpath, *freepath; 3089 struct kinfo_ofile *kif; 3090 struct filedesc *fdp; 3091 int error, i, *name; 3092 struct shmfd *shmfd; 3093 struct socket *so; 3094 struct vnode *vp; 3095 struct ksem *ks; 3096 struct file *fp; 3097 struct proc *p; 3098 struct tty *tp; 3099 3100 name = (int *)arg1; 3101 error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); 3102 if (error != 0) 3103 return (error); 3104 fdp = fdhold(p); 3105 PROC_UNLOCK(p); 3106 if (fdp == NULL) 3107 return (ENOENT); 3108 kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); 3109 FILEDESC_SLOCK(fdp); 3110 if (fdp->fd_cdir != NULL) 3111 export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif, 3112 fdp, req); 3113 if (fdp->fd_rdir != NULL) 3114 export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif, 3115 fdp, req); 3116 if (fdp->fd_jdir != NULL) 3117 export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, 3118 fdp, req); 3119 for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { 3120 if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) 3121 continue; 3122 bzero(kif, sizeof(*kif)); 3123 kif->kf_structsize = sizeof(*kif); 3124 ks = NULL; 3125 vp = NULL; 3126 so = NULL; 3127 tp = NULL; 3128 shmfd = NULL; 3129 kif->kf_fd = i; 3130 3131 switch (fp->f_type) { 3132 case DTYPE_VNODE: 3133 kif->kf_type = KF_TYPE_VNODE; 3134 vp = fp->f_vnode; 3135 break; 3136 3137 case DTYPE_SOCKET: 3138 kif->kf_type = KF_TYPE_SOCKET; 3139 so = fp->f_data; 3140 break; 3141 3142 case DTYPE_PIPE: 3143 kif->kf_type = KF_TYPE_PIPE; 3144 break; 3145 3146 case DTYPE_FIFO: 3147 kif->kf_type = KF_TYPE_FIFO; 3148 vp = fp->f_vnode; 3149 break; 3150 3151 case DTYPE_KQUEUE: 3152 kif->kf_type = KF_TYPE_KQUEUE; 3153 break; 3154 3155 case DTYPE_CRYPTO: 3156 kif->kf_type = KF_TYPE_CRYPTO; 3157 break; 3158 3159 case DTYPE_MQUEUE: 3160 kif->kf_type = KF_TYPE_MQUEUE; 3161 break; 3162 3163 case DTYPE_SHM: 3164 kif->kf_type = KF_TYPE_SHM; 3165 shmfd = fp->f_data; 3166 break; 3167 3168 case DTYPE_SEM: 3169 kif->kf_type = KF_TYPE_SEM; 3170 ks = fp->f_data; 3171 break; 3172 3173 case DTYPE_PTS: 3174 kif->kf_type = KF_TYPE_PTS; 3175 tp = fp->f_data; 3176 break; 3177 3178#ifdef PROCDESC 3179 case DTYPE_PROCDESC: 3180 kif->kf_type = KF_TYPE_PROCDESC; 3181 break; 3182#endif 3183 3184 default: 3185 kif->kf_type = KF_TYPE_UNKNOWN; 3186 break; 3187 } 3188 kif->kf_ref_count = fp->f_count; 3189 if (fp->f_flag & FREAD) 3190 kif->kf_flags |= KF_FLAG_READ; 3191 if (fp->f_flag & FWRITE) 3192 kif->kf_flags |= KF_FLAG_WRITE; 3193 if (fp->f_flag & FAPPEND) 3194 kif->kf_flags |= KF_FLAG_APPEND; 3195 if (fp->f_flag & FASYNC) 3196 kif->kf_flags |= KF_FLAG_ASYNC; 3197 if (fp->f_flag & FFSYNC) 3198 kif->kf_flags |= KF_FLAG_FSYNC; 3199 if (fp->f_flag & FNONBLOCK) 3200 kif->kf_flags |= KF_FLAG_NONBLOCK; 3201 if (fp->f_flag & O_DIRECT) 3202 kif->kf_flags |= KF_FLAG_DIRECT; 3203 if (fp->f_flag & FHASLOCK) 3204 kif->kf_flags |= KF_FLAG_HASLOCK; 3205 kif->kf_offset = foffset_get(fp); 3206 if (vp != NULL) { 3207 vref(vp); 3208 switch (vp->v_type) { 3209 case VNON: 3210 kif->kf_vnode_type = KF_VTYPE_VNON; 3211 break; 3212 case VREG: 3213 kif->kf_vnode_type = KF_VTYPE_VREG; 3214 break; 3215 case VDIR: 3216 kif->kf_vnode_type = KF_VTYPE_VDIR; 3217 break; 3218 case VBLK: 3219 kif->kf_vnode_type = KF_VTYPE_VBLK; 3220 break; 3221 case VCHR: 3222 kif->kf_vnode_type = KF_VTYPE_VCHR; 3223 break; 3224 case VLNK: 3225 kif->kf_vnode_type = KF_VTYPE_VLNK; 3226 break; 3227 case VSOCK: 3228 kif->kf_vnode_type = KF_VTYPE_VSOCK; 3229 break; 3230 case VFIFO: 3231 kif->kf_vnode_type = KF_VTYPE_VFIFO; 3232 break; 3233 case VBAD: 3234 kif->kf_vnode_type = KF_VTYPE_VBAD; 3235 break; 3236 default: 3237 kif->kf_vnode_type = KF_VTYPE_UNKNOWN; 3238 break; 3239 } 3240 /* 3241 * It is OK to drop the filedesc lock here as we will 3242 * re-validate and re-evaluate its properties when 3243 * the loop continues. 3244 */ 3245 freepath = NULL; 3246 fullpath = "-"; 3247 FILEDESC_SUNLOCK(fdp); 3248 vn_fullpath(curthread, vp, &fullpath, &freepath); 3249 vrele(vp); 3250 strlcpy(kif->kf_path, fullpath, 3251 sizeof(kif->kf_path)); 3252 if (freepath != NULL) 3253 free(freepath, M_TEMP); 3254 FILEDESC_SLOCK(fdp); 3255 } 3256 if (so != NULL) { 3257 struct sockaddr *sa; 3258 3259 if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa) 3260 == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) { 3261 bcopy(sa, &kif->kf_sa_local, sa->sa_len); 3262 free(sa, M_SONAME); 3263 } 3264 if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa) 3265 == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { 3266 bcopy(sa, &kif->kf_sa_peer, sa->sa_len); 3267 free(sa, M_SONAME); 3268 } 3269 kif->kf_sock_domain = 3270 so->so_proto->pr_domain->dom_family; 3271 kif->kf_sock_type = so->so_type; 3272 kif->kf_sock_protocol = so->so_proto->pr_protocol; 3273 } 3274 if (tp != NULL) { 3275 strlcpy(kif->kf_path, tty_devname(tp), 3276 sizeof(kif->kf_path)); 3277 } 3278 if (shmfd != NULL) 3279 shm_path(shmfd, kif->kf_path, sizeof(kif->kf_path)); 3280 if (ks != NULL && ksem_info != NULL) 3281 ksem_info(ks, kif->kf_path, sizeof(kif->kf_path), NULL); 3282 error = SYSCTL_OUT(req, kif, sizeof(*kif)); 3283 if (error) 3284 break; 3285 } 3286 FILEDESC_SUNLOCK(fdp); 3287 fddrop(fdp); 3288 free(kif, M_TEMP); 3289 return (0); 3290} 3291 3292static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, 3293 CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_ofiledesc, 3294 "Process ofiledesc entries"); 3295#endif /* COMPAT_FREEBSD7 */ 3296 3297#ifdef KINFO_FILE_SIZE 3298CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); 3299#endif 3300 3301struct export_fd_buf { 3302 struct filedesc *fdp; 3303 struct sbuf *sb; 3304 ssize_t remainder; 3305 struct kinfo_file kif; 3306}; 3307 3308static int 3309export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt, 3310 int64_t offset, cap_rights_t *rightsp, struct export_fd_buf *efbuf) 3311{ 3312 struct { 3313 int fflag; 3314 int kf_fflag; 3315 } fflags_table[] = { 3316 { FAPPEND, KF_FLAG_APPEND }, 3317 { FASYNC, KF_FLAG_ASYNC }, 3318 { FFSYNC, KF_FLAG_FSYNC }, 3319 { FHASLOCK, KF_FLAG_HASLOCK }, 3320 { FNONBLOCK, KF_FLAG_NONBLOCK }, 3321 { FREAD, KF_FLAG_READ }, 3322 { FWRITE, KF_FLAG_WRITE }, 3323 { O_CREAT, KF_FLAG_CREAT }, 3324 { O_DIRECT, KF_FLAG_DIRECT }, 3325 { O_EXCL, KF_FLAG_EXCL }, 3326 { O_EXEC, KF_FLAG_EXEC }, 3327 { O_EXLOCK, KF_FLAG_EXLOCK }, 3328 { O_NOFOLLOW, KF_FLAG_NOFOLLOW }, 3329 { O_SHLOCK, KF_FLAG_SHLOCK }, 3330 { O_TRUNC, KF_FLAG_TRUNC } 3331 }; 3332#define NFFLAGS (sizeof(fflags_table) / sizeof(*fflags_table)) 3333 struct kinfo_file *kif; 3334 struct vnode *vp; 3335 int error, locked; 3336 unsigned int i; 3337 3338 if (efbuf->remainder == 0) 3339 return (0); 3340 kif = &efbuf->kif; 3341 bzero(kif, sizeof(*kif)); 3342 locked = efbuf->fdp != NULL; 3343 switch (type) { 3344 case KF_TYPE_FIFO: 3345 case KF_TYPE_VNODE: 3346 if (locked) { 3347 FILEDESC_SUNLOCK(efbuf->fdp); 3348 locked = 0; 3349 } 3350 vp = (struct vnode *)data; 3351 error = fill_vnode_info(vp, kif); 3352 vrele(vp); 3353 break; 3354 case KF_TYPE_SOCKET: 3355 error = fill_socket_info((struct socket *)data, kif); 3356 break; 3357 case KF_TYPE_PIPE: 3358 error = fill_pipe_info((struct pipe *)data, kif); 3359 break; 3360 case KF_TYPE_PTS: 3361 error = fill_pts_info((struct tty *)data, kif); 3362 break; 3363 case KF_TYPE_PROCDESC: 3364 error = fill_procdesc_info((struct procdesc *)data, kif); 3365 break; 3366 case KF_TYPE_SEM: 3367 error = fill_sem_info((struct file *)data, kif); 3368 break; 3369 case KF_TYPE_SHM: 3370 error = fill_shm_info((struct file *)data, kif); 3371 break; 3372 default: 3373 error = 0; 3374 } 3375 if (error == 0) 3376 kif->kf_status |= KF_ATTR_VALID; 3377 3378 /* 3379 * Translate file access flags. 3380 */ 3381 for (i = 0; i < NFFLAGS; i++) 3382 if (fflags & fflags_table[i].fflag) 3383 kif->kf_flags |= fflags_table[i].kf_fflag; 3384 if (rightsp != NULL) 3385 kif->kf_cap_rights = *rightsp; 3386 else 3387 cap_rights_init(&kif->kf_cap_rights); 3388 kif->kf_fd = fd; 3389 kif->kf_type = type; 3390 kif->kf_ref_count = refcnt; 3391 kif->kf_offset = offset; 3392 /* Pack record size down */ 3393 kif->kf_structsize = offsetof(struct kinfo_file, kf_path) + 3394 strlen(kif->kf_path) + 1; 3395 kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t)); 3396 if (efbuf->remainder != -1) { 3397 if (efbuf->remainder < kif->kf_structsize) { 3398 /* Terminate export. */ 3399 efbuf->remainder = 0; 3400 if (efbuf->fdp != NULL && !locked) 3401 FILEDESC_SLOCK(efbuf->fdp); 3402 return (0); 3403 } 3404 efbuf->remainder -= kif->kf_structsize; 3405 } 3406 if (locked) 3407 FILEDESC_SUNLOCK(efbuf->fdp); 3408 error = sbuf_bcat(efbuf->sb, kif, kif->kf_structsize) == 0 ? 0 : ENOMEM; 3409 if (efbuf->fdp != NULL) 3410 FILEDESC_SLOCK(efbuf->fdp); 3411 return (error); 3412} 3413 3414/* 3415 * Store a process file descriptor information to sbuf. 3416 * 3417 * Takes a locked proc as argument, and returns with the proc unlocked. 3418 */ 3419int 3420kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) 3421{ 3422 struct file *fp; 3423 struct filedesc *fdp; 3424 struct export_fd_buf *efbuf; 3425 struct vnode *cttyvp, *textvp, *tracevp; 3426 int64_t offset; 3427 void *data; 3428 int error, i; 3429 int type, refcnt, fflags; 3430 cap_rights_t rights; 3431 3432 PROC_LOCK_ASSERT(p, MA_OWNED); 3433 3434 /* ktrace vnode */ 3435 tracevp = p->p_tracevp; 3436 if (tracevp != NULL) 3437 vref(tracevp); 3438 /* text vnode */ 3439 textvp = p->p_textvp; 3440 if (textvp != NULL) 3441 vref(textvp); 3442 /* Controlling tty. */ 3443 cttyvp = NULL; 3444 if (p->p_pgrp != NULL && p->p_pgrp->pg_session != NULL) { 3445 cttyvp = p->p_pgrp->pg_session->s_ttyvp; 3446 if (cttyvp != NULL) 3447 vref(cttyvp); 3448 } 3449 fdp = fdhold(p); 3450 PROC_UNLOCK(p); 3451 efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK); 3452 efbuf->fdp = NULL; 3453 efbuf->sb = sb; 3454 efbuf->remainder = maxlen; 3455 if (tracevp != NULL) 3456 export_fd_to_sb(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE, 3457 FREAD | FWRITE, -1, -1, NULL, efbuf); 3458 if (textvp != NULL) 3459 export_fd_to_sb(textvp, KF_TYPE_VNODE, KF_FD_TYPE_TEXT, 3460 FREAD, -1, -1, NULL, efbuf); 3461 if (cttyvp != NULL) 3462 export_fd_to_sb(cttyvp, KF_TYPE_VNODE, KF_FD_TYPE_CTTY, 3463 FREAD | FWRITE, -1, -1, NULL, efbuf); 3464 error = 0; 3465 if (fdp == NULL) 3466 goto fail; 3467 efbuf->fdp = fdp; 3468 FILEDESC_SLOCK(fdp); 3469 /* working directory */ 3470 if (fdp->fd_cdir != NULL) { 3471 vref(fdp->fd_cdir); 3472 data = fdp->fd_cdir; 3473 export_fd_to_sb(data, KF_TYPE_VNODE, KF_FD_TYPE_CWD, 3474 FREAD, -1, -1, NULL, efbuf); 3475 } 3476 /* root directory */ 3477 if (fdp->fd_rdir != NULL) { 3478 vref(fdp->fd_rdir); 3479 data = fdp->fd_rdir; 3480 export_fd_to_sb(data, KF_TYPE_VNODE, KF_FD_TYPE_ROOT, 3481 FREAD, -1, -1, NULL, efbuf); 3482 } 3483 /* jail directory */ 3484 if (fdp->fd_jdir != NULL) { 3485 vref(fdp->fd_jdir); 3486 data = fdp->fd_jdir; 3487 export_fd_to_sb(data, KF_TYPE_VNODE, KF_FD_TYPE_JAIL, 3488 FREAD, -1, -1, NULL, efbuf); 3489 } 3490 for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { 3491 if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) 3492 continue; 3493 data = NULL; 3494#ifdef CAPABILITIES 3495 rights = *cap_rights(fdp, i); 3496#else /* !CAPABILITIES */ 3497 cap_rights_init(&rights); 3498#endif 3499 switch (fp->f_type) { 3500 case DTYPE_VNODE: 3501 type = KF_TYPE_VNODE; 3502 vref(fp->f_vnode); 3503 data = fp->f_vnode; 3504 break; 3505 3506 case DTYPE_SOCKET: 3507 type = KF_TYPE_SOCKET; 3508 data = fp->f_data; 3509 break; 3510 3511 case DTYPE_PIPE: 3512 type = KF_TYPE_PIPE; 3513 data = fp->f_data; 3514 break; 3515 3516 case DTYPE_FIFO: 3517 type = KF_TYPE_FIFO; 3518 vref(fp->f_vnode); 3519 data = fp->f_vnode; 3520 break; 3521 3522 case DTYPE_KQUEUE: 3523 type = KF_TYPE_KQUEUE; 3524 break; 3525 3526 case DTYPE_CRYPTO: 3527 type = KF_TYPE_CRYPTO; 3528 break; 3529 3530 case DTYPE_MQUEUE: 3531 type = KF_TYPE_MQUEUE; 3532 break; 3533 3534 case DTYPE_SHM: 3535 type = KF_TYPE_SHM; 3536 data = fp; 3537 break; 3538 3539 case DTYPE_SEM: 3540 type = KF_TYPE_SEM; 3541 data = fp; 3542 break; 3543 3544 case DTYPE_PTS: 3545 type = KF_TYPE_PTS; 3546 data = fp->f_data; 3547 break; 3548 3549#ifdef PROCDESC 3550 case DTYPE_PROCDESC: 3551 type = KF_TYPE_PROCDESC; 3552 data = fp->f_data; 3553 break; 3554#endif 3555 3556 default: 3557 type = KF_TYPE_UNKNOWN; 3558 break; 3559 } 3560 refcnt = fp->f_count; 3561 fflags = fp->f_flag; 3562 offset = foffset_get(fp); 3563 3564 /* 3565 * Create sysctl entry. 3566 * It is OK to drop the filedesc lock here as we will 3567 * re-validate and re-evaluate its properties when 3568 * the loop continues. 3569 */ 3570 error = export_fd_to_sb(data, type, i, fflags, refcnt, 3571 offset, &rights, efbuf); 3572 if (error != 0) 3573 break; 3574 } 3575 FILEDESC_SUNLOCK(fdp); 3576 fddrop(fdp); 3577fail: 3578 free(efbuf, M_TEMP); 3579 return (error); 3580} 3581 3582#define FILEDESC_SBUF_SIZE (sizeof(struct kinfo_file) * 5) 3583 3584/* 3585 * Get per-process file descriptors for use by procstat(1), et al. 3586 */ 3587static int 3588sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) 3589{ 3590 struct sbuf sb; 3591 struct proc *p; 3592 ssize_t maxlen; 3593 int error, error2, *name; 3594 3595 name = (int *)arg1; 3596 3597 sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req); 3598 error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); 3599 if (error != 0) { 3600 sbuf_delete(&sb); 3601 return (error); 3602 } 3603 maxlen = req->oldptr != NULL ? req->oldlen : -1; 3604 error = kern_proc_filedesc_out(p, &sb, maxlen); 3605 error2 = sbuf_finish(&sb); 3606 sbuf_delete(&sb); 3607 return (error != 0 ? error : error2); 3608} 3609 3610int 3611vntype_to_kinfo(int vtype) 3612{ 3613 struct { 3614 int vtype; 3615 int kf_vtype; 3616 } vtypes_table[] = { 3617 { VBAD, KF_VTYPE_VBAD }, 3618 { VBLK, KF_VTYPE_VBLK }, 3619 { VCHR, KF_VTYPE_VCHR }, 3620 { VDIR, KF_VTYPE_VDIR }, 3621 { VFIFO, KF_VTYPE_VFIFO }, 3622 { VLNK, KF_VTYPE_VLNK }, 3623 { VNON, KF_VTYPE_VNON }, 3624 { VREG, KF_VTYPE_VREG }, 3625 { VSOCK, KF_VTYPE_VSOCK } 3626 }; 3627#define NVTYPES (sizeof(vtypes_table) / sizeof(*vtypes_table)) 3628 unsigned int i; 3629 3630 /* 3631 * Perform vtype translation. 3632 */ 3633 for (i = 0; i < NVTYPES; i++) 3634 if (vtypes_table[i].vtype == vtype) 3635 break; 3636 if (i < NVTYPES) 3637 return (vtypes_table[i].kf_vtype); 3638 3639 return (KF_VTYPE_UNKNOWN); 3640} 3641 3642static int 3643fill_vnode_info(struct vnode *vp, struct kinfo_file *kif) 3644{ 3645 struct vattr va; 3646 char *fullpath, *freepath; 3647 int error; 3648 3649 if (vp == NULL) 3650 return (1); 3651 kif->kf_vnode_type = vntype_to_kinfo(vp->v_type); 3652 freepath = NULL; 3653 fullpath = "-"; 3654 error = vn_fullpath(curthread, vp, &fullpath, &freepath); 3655 if (error == 0) { 3656 strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path)); 3657 } 3658 if (freepath != NULL) 3659 free(freepath, M_TEMP); 3660 3661 /* 3662 * Retrieve vnode attributes. 3663 */ 3664 va.va_fsid = VNOVAL; 3665 va.va_rdev = NODEV; 3666 vn_lock(vp, LK_SHARED | LK_RETRY); 3667 error = VOP_GETATTR(vp, &va, curthread->td_ucred); 3668 VOP_UNLOCK(vp, 0); 3669 if (error != 0) 3670 return (error); 3671 if (va.va_fsid != VNOVAL) 3672 kif->kf_un.kf_file.kf_file_fsid = va.va_fsid; 3673 else 3674 kif->kf_un.kf_file.kf_file_fsid = 3675 vp->v_mount->mnt_stat.f_fsid.val[0]; 3676 kif->kf_un.kf_file.kf_file_fileid = va.va_fileid; 3677 kif->kf_un.kf_file.kf_file_mode = MAKEIMODE(va.va_type, va.va_mode); 3678 kif->kf_un.kf_file.kf_file_size = va.va_size; 3679 kif->kf_un.kf_file.kf_file_rdev = va.va_rdev; 3680 return (0); 3681} 3682 3683static int 3684fill_socket_info(struct socket *so, struct kinfo_file *kif) 3685{ 3686 struct sockaddr *sa; 3687 struct inpcb *inpcb; 3688 struct unpcb *unpcb; 3689 int error; 3690 3691 if (so == NULL) 3692 return (1); 3693 kif->kf_sock_domain = so->so_proto->pr_domain->dom_family; 3694 kif->kf_sock_type = so->so_type; 3695 kif->kf_sock_protocol = so->so_proto->pr_protocol; 3696 kif->kf_un.kf_sock.kf_sock_pcb = (uintptr_t)so->so_pcb; 3697 switch(kif->kf_sock_domain) { 3698 case AF_INET: 3699 case AF_INET6: 3700 if (kif->kf_sock_protocol == IPPROTO_TCP) { 3701 if (so->so_pcb != NULL) { 3702 inpcb = (struct inpcb *)(so->so_pcb); 3703 kif->kf_un.kf_sock.kf_sock_inpcb = 3704 (uintptr_t)inpcb->inp_ppcb; 3705 } 3706 } 3707 break; 3708 case AF_UNIX: 3709 if (so->so_pcb != NULL) { 3710 unpcb = (struct unpcb *)(so->so_pcb); 3711 if (unpcb->unp_conn) { 3712 kif->kf_un.kf_sock.kf_sock_unpconn = 3713 (uintptr_t)unpcb->unp_conn; 3714 kif->kf_un.kf_sock.kf_sock_rcv_sb_state = 3715 so->so_rcv.sb_state; 3716 kif->kf_un.kf_sock.kf_sock_snd_sb_state = 3717 so->so_snd.sb_state; 3718 } 3719 } 3720 break; 3721 } 3722 error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); 3723 if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) { 3724 bcopy(sa, &kif->kf_sa_local, sa->sa_len); 3725 free(sa, M_SONAME); 3726 } 3727 error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa); 3728 if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { 3729 bcopy(sa, &kif->kf_sa_peer, sa->sa_len); 3730 free(sa, M_SONAME); 3731 } 3732 strncpy(kif->kf_path, so->so_proto->pr_domain->dom_name, 3733 sizeof(kif->kf_path)); 3734 return (0); 3735} 3736 3737static int 3738fill_pts_info(struct tty *tp, struct kinfo_file *kif) 3739{ 3740 3741 if (tp == NULL) 3742 return (1); 3743 kif->kf_un.kf_pts.kf_pts_dev = tty_udev(tp); 3744 strlcpy(kif->kf_path, tty_devname(tp), sizeof(kif->kf_path)); 3745 return (0); 3746} 3747 3748static int 3749fill_pipe_info(struct pipe *pi, struct kinfo_file *kif) 3750{ 3751 3752 if (pi == NULL) 3753 return (1); 3754 kif->kf_un.kf_pipe.kf_pipe_addr = (uintptr_t)pi; 3755 kif->kf_un.kf_pipe.kf_pipe_peer = (uintptr_t)pi->pipe_peer; 3756 kif->kf_un.kf_pipe.kf_pipe_buffer_cnt = pi->pipe_buffer.cnt; 3757 return (0); 3758} 3759 3760static int 3761fill_procdesc_info(struct procdesc *pdp, struct kinfo_file *kif) 3762{ 3763 3764 if (pdp == NULL) 3765 return (1); 3766 kif->kf_un.kf_proc.kf_pid = pdp->pd_pid; 3767 return (0); 3768} 3769 3770static int 3771fill_sem_info(struct file *fp, struct kinfo_file *kif) 3772{ 3773 struct thread *td; 3774 struct stat sb; 3775 3776 td = curthread; 3777 if (fp->f_data == NULL) 3778 return (1); 3779 if (fo_stat(fp, &sb, td->td_ucred, td) != 0) 3780 return (1); 3781 if (ksem_info == NULL) 3782 return (1); 3783 ksem_info(fp->f_data, kif->kf_path, sizeof(kif->kf_path), 3784 &kif->kf_un.kf_sem.kf_sem_value); 3785 kif->kf_un.kf_sem.kf_sem_mode = sb.st_mode; 3786 return (0); 3787} 3788 3789static int 3790fill_shm_info(struct file *fp, struct kinfo_file *kif) 3791{ 3792 struct thread *td; 3793 struct stat sb; 3794 3795 td = curthread; 3796 if (fp->f_data == NULL) 3797 return (1); 3798 if (fo_stat(fp, &sb, td->td_ucred, td) != 0) 3799 return (1); 3800 shm_path(fp->f_data, kif->kf_path, sizeof(kif->kf_path)); 3801 kif->kf_un.kf_file.kf_file_mode = sb.st_mode; 3802 kif->kf_un.kf_file.kf_file_size = sb.st_size; 3803 return (0); 3804} 3805 3806static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, 3807 CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_filedesc, 3808 "Process filedesc entries"); 3809 3810#ifdef DDB 3811/* 3812 * For the purposes of debugging, generate a human-readable string for the 3813 * file type. 3814 */ 3815static const char * 3816file_type_to_name(short type) 3817{ 3818 3819 switch (type) { 3820 case 0: 3821 return ("zero"); 3822 case DTYPE_VNODE: 3823 return ("vnod"); 3824 case DTYPE_SOCKET: 3825 return ("sock"); 3826 case DTYPE_PIPE: 3827 return ("pipe"); 3828 case DTYPE_FIFO: 3829 return ("fifo"); 3830 case DTYPE_KQUEUE: 3831 return ("kque"); 3832 case DTYPE_CRYPTO: 3833 return ("crpt"); 3834 case DTYPE_MQUEUE: 3835 return ("mque"); 3836 case DTYPE_SHM: 3837 return ("shm"); 3838 case DTYPE_SEM: 3839 return ("ksem"); 3840 default: 3841 return ("unkn"); 3842 } 3843} 3844 3845/* 3846 * For the purposes of debugging, identify a process (if any, perhaps one of 3847 * many) that references the passed file in its file descriptor array. Return 3848 * NULL if none. 3849 */ 3850static struct proc * 3851file_to_first_proc(struct file *fp) 3852{ 3853 struct filedesc *fdp; 3854 struct proc *p; 3855 int n; 3856 3857 FOREACH_PROC_IN_SYSTEM(p) { 3858 if (p->p_state == PRS_NEW) 3859 continue; 3860 fdp = p->p_fd; 3861 if (fdp == NULL) 3862 continue; 3863 for (n = 0; n <= fdp->fd_lastfile; n++) { 3864 if (fp == fdp->fd_ofiles[n].fde_file) 3865 return (p); 3866 } 3867 } 3868 return (NULL); 3869} 3870 3871static void 3872db_print_file(struct file *fp, int header) 3873{ 3874 struct proc *p; 3875 3876 if (header) 3877 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", 3878 "File", "Type", "Data", "Flag", "GCFl", "Count", 3879 "MCount", "Vnode", "FPID", "FCmd"); 3880 p = file_to_first_proc(fp); 3881 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 3882 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 3883 0, fp->f_count, 0, fp->f_vnode, 3884 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 3885} 3886 3887DB_SHOW_COMMAND(file, db_show_file) 3888{ 3889 struct file *fp; 3890 3891 if (!have_addr) { 3892 db_printf("usage: show file <addr>\n"); 3893 return; 3894 } 3895 fp = (struct file *)addr; 3896 db_print_file(fp, 1); 3897} 3898 3899DB_SHOW_COMMAND(files, db_show_files) 3900{ 3901 struct filedesc *fdp; 3902 struct file *fp; 3903 struct proc *p; 3904 int header; 3905 int n; 3906 3907 header = 1; 3908 FOREACH_PROC_IN_SYSTEM(p) { 3909 if (p->p_state == PRS_NEW) 3910 continue; 3911 if ((fdp = p->p_fd) == NULL) 3912 continue; 3913 for (n = 0; n <= fdp->fd_lastfile; ++n) { 3914 if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) 3915 continue; 3916 db_print_file(fp, header); 3917 header = 0; 3918 } 3919 } 3920} 3921#endif 3922 3923SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 3924 &maxfilesperproc, 0, "Maximum files allowed open per process"); 3925 3926SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 3927 &maxfiles, 0, "Maximum number of files"); 3928 3929SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 3930 __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files"); 3931 3932/* ARGSUSED*/ 3933static void 3934filelistinit(void *dummy) 3935{ 3936 3937 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 3938 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 3939 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 3940 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 3941} 3942SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); 3943 3944/*-------------------------------------------------------------------*/ 3945 3946static int 3947badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, 3948 int flags, struct thread *td) 3949{ 3950 3951 return (EBADF); 3952} 3953 3954static int 3955badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, 3956 struct thread *td) 3957{ 3958 3959 return (EINVAL); 3960} 3961 3962static int 3963badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, 3964 struct thread *td) 3965{ 3966 3967 return (EBADF); 3968} 3969 3970static int 3971badfo_poll(struct file *fp, int events, struct ucred *active_cred, 3972 struct thread *td) 3973{ 3974 3975 return (0); 3976} 3977 3978static int 3979badfo_kqfilter(struct file *fp, struct knote *kn) 3980{ 3981 3982 return (EBADF); 3983} 3984 3985static int 3986badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 3987 struct thread *td) 3988{ 3989 3990 return (EBADF); 3991} 3992 3993static int 3994badfo_close(struct file *fp, struct thread *td) 3995{ 3996 3997 return (EBADF); 3998} 3999 4000static int 4001badfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, 4002 struct thread *td) 4003{ 4004 4005 return (EBADF); 4006} 4007 4008static int 4009badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, 4010 struct thread *td) 4011{ 4012 4013 return (EBADF); 4014} 4015 4016static int 4017badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, 4018 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, 4019 int kflags, struct thread *td) 4020{ 4021 4022 return (EBADF); 4023} 4024 4025struct fileops badfileops = { 4026 .fo_read = badfo_readwrite, 4027 .fo_write = badfo_readwrite, 4028 .fo_truncate = badfo_truncate, 4029 .fo_ioctl = badfo_ioctl, 4030 .fo_poll = badfo_poll, 4031 .fo_kqfilter = badfo_kqfilter, 4032 .fo_stat = badfo_stat, 4033 .fo_close = badfo_close, 4034 .fo_chmod = badfo_chmod, 4035 .fo_chown = badfo_chown, 4036 .fo_sendfile = badfo_sendfile, 4037}; 4038 4039int 4040invfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, 4041 struct thread *td) 4042{ 4043 4044 return (EINVAL); 4045} 4046 4047int 4048invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, 4049 struct thread *td) 4050{ 4051 4052 return (EINVAL); 4053} 4054 4055int 4056invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, 4057 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, 4058 int kflags, struct thread *td) 4059{ 4060 4061 return (EINVAL); 4062} 4063 4064/*-------------------------------------------------------------------*/ 4065 4066/* 4067 * File Descriptor pseudo-device driver (/dev/fd/). 4068 * 4069 * Opening minor device N dup()s the file (if any) connected to file 4070 * descriptor N belonging to the calling process. Note that this driver 4071 * consists of only the ``open()'' routine, because all subsequent 4072 * references to this file will be direct to the other driver. 4073 * 4074 * XXX: we could give this one a cloning event handler if necessary. 4075 */ 4076 4077/* ARGSUSED */ 4078static int 4079fdopen(struct cdev *dev, int mode, int type, struct thread *td) 4080{ 4081 4082 /* 4083 * XXX Kludge: set curthread->td_dupfd to contain the value of the 4084 * the file descriptor being sought for duplication. The error 4085 * return ensures that the vnode for this device will be released 4086 * by vn_open. Open will detect this special error and take the 4087 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 4088 * will simply report the error. 4089 */ 4090 td->td_dupfd = dev2unit(dev); 4091 return (ENODEV); 4092} 4093 4094static struct cdevsw fildesc_cdevsw = { 4095 .d_version = D_VERSION, 4096 .d_open = fdopen, 4097 .d_name = "FD", 4098}; 4099 4100static void 4101fildesc_drvinit(void *unused) 4102{ 4103 struct cdev *dev; 4104 4105 dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 0, NULL, 4106 UID_ROOT, GID_WHEEL, 0666, "fd/0"); 4107 make_dev_alias(dev, "stdin"); 4108 dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 1, NULL, 4109 UID_ROOT, GID_WHEEL, 0666, "fd/1"); 4110 make_dev_alias(dev, "stdout"); 4111 dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 2, NULL, 4112 UID_ROOT, GID_WHEEL, 0666, "fd/2"); 4113 make_dev_alias(dev, "stderr"); 4114} 4115 4116SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL); 4117