1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#ifndef _SYS_FILEDESC_H_
33#define	_SYS_FILEDESC_H_
34
35#include <sys/types.h>
36#include <sys/caprights.h>
37#include <sys/queue.h>
38#include <sys/event.h>
39#include <sys/lock.h>
40#include <sys/mutex.h>
41#include <sys/priority.h>
42#include <sys/seqc.h>
43#include <sys/sx.h>
44#include <sys/_smr.h>
45#include <sys/smr_types.h>
46
47#include <machine/_limits.h>
48
49struct filecaps {
50	cap_rights_t	 fc_rights;	/* per-descriptor capability rights */
51	u_long		*fc_ioctls;	/* per-descriptor allowed ioctls */
52	int16_t		 fc_nioctls;	/* fc_ioctls array size */
53	uint32_t	 fc_fcntls;	/* per-descriptor allowed fcntls */
54};
55
56struct filedescent {
57	struct file	*fde_file;	/* file structure for open file */
58	struct filecaps	 fde_caps;	/* per-descriptor rights */
59	uint8_t		 fde_flags;	/* per-process open file flags */
60	seqc_t		 fde_seqc;	/* keep file and caps in sync */
61};
62#define	fde_rights	fde_caps.fc_rights
63#define	fde_fcntls	fde_caps.fc_fcntls
64#define	fde_ioctls	fde_caps.fc_ioctls
65#define	fde_nioctls	fde_caps.fc_nioctls
66
67#ifdef _KERNEL
68static inline void
69fde_copy(struct filedescent *from, struct filedescent *to)
70{
71
72	to->fde_file = from->fde_file;
73	to->fde_caps = from->fde_caps;
74	to->fde_flags = from->fde_flags;
75}
76#endif
77
78struct fdescenttbl {
79	int	fdt_nfiles;		/* number of open files allocated */
80	struct	filedescent fdt_ofiles[0];	/* open files */
81};
82#define	fd_seqc(fdt, fd)	(&(fdt)->fdt_ofiles[(fd)].fde_seqc)
83
84#define NDSLOTTYPE	u_long
85
86/*
87 * This struct is copy-on-write and allocated from an SMR zone.
88 * All fields are constant after initialization apart from the reference count.
89 * The ABI root directory is initialized as the root directory and changed
90 * during process transiting to or from non-native ABI.
91 *
92 * Check pwd_* routines for usage.
93 */
94struct pwd {
95	u_int		pwd_refcount;
96	struct	vnode	*pwd_cdir;	/* current directory */
97	struct	vnode	*pwd_rdir;	/* root directory */
98	struct	vnode	*pwd_jdir;	/* jail root directory */
99	struct	vnode	*pwd_adir;	/* abi root directory */
100};
101typedef SMR_POINTER(struct pwd *) smrpwd_t;
102
103struct pwddesc {
104	struct mtx	pd_lock;	/* protects members of this struct */
105	smrpwd_t	pd_pwd;		/* directories */
106	u_int		pd_refcount;
107	u_short		pd_cmask;	/* mask for file creation */
108};
109
110/*
111 * This structure is used for the management of descriptors.  It may be
112 * shared by multiple processes.
113 */
114struct filedesc {
115	struct	fdescenttbl *fd_files;	/* open files table */
116	NDSLOTTYPE *fd_map;		/* bitmap of free fds */
117	int	fd_freefile;		/* approx. next free file */
118	int	fd_refcnt;		/* thread reference count */
119	int	fd_holdcnt;		/* hold count on structure + mutex */
120	struct	sx fd_sx;		/* protects members of this struct */
121	struct	kqlist fd_kqlist;	/* list of kqueues on this filedesc */
122	int	fd_holdleaderscount;	/* block fdfree() for shared close() */
123	int	fd_holdleaderswakeup;	/* fdfree() needs wakeup */
124};
125
126/*
127 * Structure to keep track of (process leader, struct fildedesc) tuples.
128 * Each process has a pointer to such a structure when detailed tracking
129 * is needed, e.g., when rfork(RFPROC | RFMEM) causes a file descriptor
130 * table to be shared by processes having different "p_leader" pointers
131 * and thus distinct POSIX style locks.
132 *
133 * fdl_refcount and fdl_holdcount are protected by struct filedesc mtx.
134 */
135struct filedesc_to_leader {
136	int		fdl_refcount;	/* references from struct proc */
137	int		fdl_holdcount;	/* temporary hold during closef */
138	int		fdl_wakeup;	/* fdfree() waits on closef() */
139	struct proc	*fdl_leader;	/* owner of POSIX locks */
140	/* Circular list: */
141	struct filedesc_to_leader *fdl_prev;
142	struct filedesc_to_leader *fdl_next;
143};
144#define	fd_nfiles	fd_files->fdt_nfiles
145#define	fd_ofiles	fd_files->fdt_ofiles
146
147/*
148 * Per-process open flags.
149 */
150#define	UF_EXCLOSE	0x01		/* auto-close on exec */
151
152#ifdef _KERNEL
153
154/* Lock a paths descriptor table. */
155#define	PWDDESC_LOCK(pdp)	(&(pdp)->pd_lock)
156#define	PWDDESC_LOCK_INIT(pdp) \
157    mtx_init(PWDDESC_LOCK(pdp), "pwddesc", NULL, MTX_DEF)
158#define	PWDDESC_LOCK_DESTROY(pdp)	mtx_destroy(PWDDESC_LOCK(pdp))
159#define	PWDDESC_XLOCK(pdp)	mtx_lock(PWDDESC_LOCK(pdp))
160#define	PWDDESC_XUNLOCK(pdp)	mtx_unlock(PWDDESC_LOCK(pdp))
161#define	PWDDESC_LOCK_ASSERT(pdp, what) \
162    mtx_assert(PWDDESC_LOCK(pdp), (what))
163#define	PWDDESC_ASSERT_XLOCKED(pdp) \
164    PWDDESC_LOCK_ASSERT((pdp), MA_OWNED)
165#define	PWDDESC_ASSERT_UNLOCKED(pdp) \
166    PWDDESC_LOCK_ASSERT((pdp), MA_NOTOWNED)
167
168#define	PWDDESC_XLOCKED_LOAD_PWD(pdp)	({					\
169	struct pwddesc *_pdp = (pdp);						\
170	struct pwd *_pwd;							\
171	_pwd = smr_serialized_load(&(_pdp)->pd_pwd,				\
172	    (PWDDESC_ASSERT_XLOCKED(_pdp), true));				\
173	_pwd;									\
174})
175
176/* Lock a file descriptor table. */
177#define	FILEDESC_LOCK_INIT(fdp)	sx_init(&(fdp)->fd_sx, "filedesc structure")
178#define	FILEDESC_LOCK_DESTROY(fdp)	sx_destroy(&(fdp)->fd_sx)
179#define	FILEDESC_LOCK(fdp)	(&(fdp)->fd_sx)
180#define	FILEDESC_XLOCK(fdp)	sx_xlock(&(fdp)->fd_sx)
181#define	FILEDESC_XUNLOCK(fdp)	sx_xunlock(&(fdp)->fd_sx)
182#define	FILEDESC_SLOCK(fdp)	sx_slock(&(fdp)->fd_sx)
183#define	FILEDESC_SUNLOCK(fdp)	sx_sunlock(&(fdp)->fd_sx)
184
185#define	FILEDESC_LOCK_ASSERT(fdp)	sx_assert(&(fdp)->fd_sx, SX_LOCKED | \
186					    SX_NOTRECURSED)
187#define	FILEDESC_XLOCK_ASSERT(fdp)	sx_assert(&(fdp)->fd_sx, SX_XLOCKED | \
188					    SX_NOTRECURSED)
189#define	FILEDESC_UNLOCK_ASSERT(fdp)	sx_assert(&(fdp)->fd_sx, SX_UNLOCKED)
190
191#define	FILEDESC_IS_ONLY_USER(fdp)	({					\
192	struct filedesc *_fdp = (fdp);						\
193	MPASS(curproc->p_fd == _fdp);						\
194	(curproc->p_numthreads == 1 && refcount_load(&_fdp->fd_refcnt) == 1);	\
195})
196
197#else
198
199/*
200 * Accessor for libkvm et al.
201 */
202#define	PWDDESC_KVM_LOAD_PWD(pdp)	({					\
203	struct pwddesc *_pdp = (pdp);						\
204	struct pwd *_pwd;							\
205	_pwd = smr_kvm_load(&(_pdp)->pd_pwd);					\
206	_pwd;									\
207})
208
209#endif
210
211#ifdef _KERNEL
212
213/* Operation types for kern_dup(). */
214enum {
215	FDDUP_NORMAL,		/* dup() behavior. */
216	FDDUP_FCNTL,		/* fcntl()-style errors. */
217	FDDUP_FIXED,		/* Force fixed allocation. */
218	FDDUP_LASTMODE,
219};
220
221/* Flags for kern_dup(). */
222#define	FDDUP_FLAG_CLOEXEC	0x1	/* Atomically set UF_EXCLOSE. */
223
224/* For backward compatibility. */
225#define	falloc(td, resultfp, resultfd, flags) \
226	falloc_caps(td, resultfp, resultfd, flags, NULL)
227
228struct mount;
229struct thread;
230
231static __inline void
232filecaps_init(struct filecaps *fcaps)
233{
234
235        bzero(fcaps, sizeof(*fcaps));
236        fcaps->fc_nioctls = -1;
237}
238bool	filecaps_copy(const struct filecaps *src, struct filecaps *dst,
239	    bool locked);
240void	filecaps_move(struct filecaps *src, struct filecaps *dst);
241void	filecaps_free(struct filecaps *fcaps);
242
243int	closef(struct file *fp, struct thread *td);
244void	closef_nothread(struct file *fp);
245int	descrip_check_write_mp(struct filedesc *fdp, struct mount *mp);
246int	dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode,
247	    int openerror, int *indxp);
248int	falloc_caps(struct thread *td, struct file **resultfp, int *resultfd,
249	    int flags, struct filecaps *fcaps);
250void	falloc_abort(struct thread *td, struct file *fp);
251int	_falloc_noinstall(struct thread *td, struct file **resultfp, u_int n);
252#define	falloc_noinstall(td, resultfp) _falloc_noinstall(td, resultfp, 1)
253void	_finstall(struct filedesc *fdp, struct file *fp, int fd, int flags,
254	    struct filecaps *fcaps);
255int	finstall(struct thread *td, struct file *fp, int *resultfd, int flags,
256	    struct filecaps *fcaps);
257int	finstall_refed(struct thread *td, struct file *fp, int *resultfd, int flags,
258	    struct filecaps *fcaps);
259int	fdalloc(struct thread *td, int minfd, int *result);
260int	fdallocn(struct thread *td, int minfd, int *fds, int n);
261int	fdcheckstd(struct thread *td);
262void	fdclose(struct thread *td, struct file *fp, int idx);
263void	fdcloseexec(struct thread *td);
264void	fdsetugidsafety(struct thread *td);
265struct	filedesc *fdcopy(struct filedesc *fdp);
266void	fdunshare(struct thread *td);
267void	fdescfree(struct thread *td);
268int	fdlastfile(struct filedesc *fdp);
269int	fdlastfile_single(struct filedesc *fdp);
270struct	filedesc *fdinit(void);
271struct	filedesc *fdshare(struct filedesc *fdp);
272struct filedesc_to_leader *
273	filedesc_to_leader_alloc(struct filedesc_to_leader *old,
274	    struct filedesc *fdp, struct proc *leader);
275struct filedesc_to_leader *
276	filedesc_to_leader_share(struct filedesc_to_leader *fdtol,
277	    struct filedesc *fdp);
278int	getvnode(struct thread *td, int fd, cap_rights_t *rightsp,
279	    struct file **fpp);
280int	getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
281	    struct file **fpp);
282void	mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
283
284int	fget_cap_noref(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
285	    struct file **fpp, struct filecaps *havecapsp);
286int	fget_cap(struct thread *td, int fd, cap_rights_t *needrightsp,
287	    struct file **fpp, struct filecaps *havecapsp);
288/* Return a referenced file from an unlocked descriptor. */
289int	fget_unlocked(struct thread *td, int fd, cap_rights_t *needrightsp,
290	    struct file **fpp);
291/* Return a file pointer without a ref. FILEDESC_IS_ONLY_USER must be true.  */
292int	fget_only_user(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
293	    struct file **fpp);
294#define	fput_only_user(fdp, fp)	({					\
295	MPASS(FILEDESC_IS_ONLY_USER(fdp));				\
296	MPASS(refcount_load(&fp->f_count) > 0);				\
297})
298
299/* Requires a FILEDESC_{S,X}LOCK held and returns without a ref. */
300static __inline struct file *
301fget_noref(struct filedesc *fdp, int fd)
302{
303
304	FILEDESC_LOCK_ASSERT(fdp);
305
306	if (__predict_false((u_int)fd >= (u_int)fdp->fd_nfiles))
307		return (NULL);
308
309	return (fdp->fd_ofiles[fd].fde_file);
310}
311
312static __inline struct filedescent *
313fdeget_noref(struct filedesc *fdp, int fd)
314{
315	struct filedescent *fde;
316
317	FILEDESC_LOCK_ASSERT(fdp);
318
319	if (__predict_false((u_int)fd >= (u_int)fdp->fd_nfiles))
320		return (NULL);
321
322	fde = &fdp->fd_ofiles[fd];
323	if (__predict_false(fde->fde_file == NULL))
324		return (NULL);
325
326	return (fde);
327}
328
329#ifdef CAPABILITIES
330static __inline bool
331fd_modified(struct filedesc *fdp, int fd, seqc_t seqc)
332{
333
334	return (!seqc_consistent(fd_seqc(fdp->fd_files, fd), seqc));
335}
336#endif
337
338/* cdir/rdir/jdir manipulation functions. */
339struct pwddesc *pdcopy(struct pwddesc *pdp);
340void	pdescfree(struct thread *td);
341struct pwddesc *pdinit(struct pwddesc *pdp, bool keeplock);
342struct pwddesc *pdshare(struct pwddesc *pdp);
343void	pdunshare(struct thread *td);
344
345void	pwd_altroot(struct thread *td, struct vnode *altroot_vp);
346void	pwd_chdir(struct thread *td, struct vnode *vp);
347int	pwd_chroot(struct thread *td, struct vnode *vp);
348int	pwd_chroot_chdir(struct thread *td, struct vnode *vp);
349void	pwd_ensure_dirs(void);
350void	pwd_set_rootvnode(void);
351
352struct pwd *pwd_hold_pwddesc(struct pwddesc *pdp);
353bool	pwd_hold_smr(struct pwd *pwd);
354struct pwd *pwd_hold_proc(struct proc *p);
355struct pwd *pwd_hold(struct thread *td);
356void	pwd_drop(struct pwd *pwd);
357static inline void
358pwd_set(struct pwddesc *pdp, struct pwd *newpwd)
359{
360	smr_serialized_store(&pdp->pd_pwd, newpwd,
361	    (PWDDESC_ASSERT_XLOCKED(pdp), true));
362}
363#define	pwd_get_smr()	vfs_smr_entered_load(&curproc->p_pd->pd_pwd)
364
365#endif /* _KERNEL */
366
367#endif /* !_SYS_FILEDESC_H_ */
368