1/*
2 * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28/*
29 * Based on libixp code: �2007-2010 Kris Maglione <maglione.k at Gmail>
30 */
31
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35#include <stdbool.h>
36#include <fcntl.h>
37#include <errno.h>
38#include <assert.h>
39#include <sys/types.h>
40#include <sys/stat.h>
41#include <sys/mount.h>
42#include <sys/param.h>
43#include <sys/queue.h>
44#include <sys/socket.h>
45#include <sys/un.h>
46#include <dirent.h>
47#include <pwd.h>
48#include <grp.h>
49#include <libgen.h>
50#include <pthread.h>
51#include "../lib9p.h"
52#include "../lib9p_impl.h"
53#include "../fid.h"
54#include "../log.h"
55#include "../rfuncs.h"
56#include "../genacl.h"
57#include "backend.h"
58#include "fs.h"
59
60#if defined(WITH_CASPER)
61  #include <libcasper.h>
62  #include <casper/cap_pwd.h>
63  #include <casper/cap_grp.h>
64#endif
65
66#if defined(__FreeBSD__)
67  #include <sys/param.h>
68  #if __FreeBSD_version >= 1000000
69    #define	HAVE_BINDAT
70  #endif
71#endif
72
73#if defined(__FreeBSD__)
74  #define	HAVE_BIRTHTIME
75#endif
76
77#if defined(__APPLE__)
78  #include <sys/syscall.h>
79  #include "Availability.h"
80  #define ACL_TYPE_NFS4 ACL_TYPE_EXTENDED
81#endif
82
83struct fs_softc {
84	int 	fs_rootfd;
85	bool	fs_readonly;
86#if defined(WITH_CASPER)
87	cap_channel_t *fs_cappwd;
88	cap_channel_t *fs_capgrp;
89#endif
90};
91
92struct fs_fid {
93	DIR	*ff_dir;
94	int	ff_dirfd;
95	int	ff_fd;
96	int	ff_flags;
97	char	*ff_name;
98	struct fs_authinfo *ff_ai;
99	pthread_mutex_t ff_mtx;
100	struct l9p_acl *ff_acl; /* cached ACL if any */
101};
102
103#define	FF_NO_NFSV4_ACL	0x01	/* don't go looking for NFSv4 ACLs */
104/*	FF_NO_POSIX_ACL	0x02	-- not yet */
105
106/*
107 * Our authinfo consists of:
108 *
109 *  - a reference count
110 *  - a uid
111 *  - a gid-set
112 *
113 * The "default" gid is the first gid in the git-set, provided the
114 * set size is at least 1.  The set-size may be zero, though.
115 *
116 * Adjustments to the ref-count must be atomic, once it's shared.
117 * It would be nice to use C11 atomics here but they are not common
118 * enough to all systems just yet; for now, we use a mutex.
119 *
120 * Note that some ops (Linux style ones) pass an effective gid for
121 * the op, in which case, that gid may override.  To achieve this
122 * effect, permissions testing functions also take an extra gid.
123 * If this gid is (gid_t)-1 it is not used and only the remaining
124 * gids take part.
125 *
126 * The uid may also be (uid_t)-1, meaning "no uid was available
127 * at all at attach time".  In this case, new files inherit parent
128 * directory uids.
129 *
130 * The refcount is simply the number of "openfile"s using this
131 * authinfo (so that when the last ref goes away, we can free it).
132 *
133 * There are also master ACL flags (same as in ff_flags).
134 */
135struct fs_authinfo {
136	pthread_mutex_t ai_mtx;	/* lock for refcnt */
137	uint32_t ai_refcnt;
138	int	ai_flags;
139	uid_t	ai_uid;
140	int	ai_ngids;
141	gid_t	ai_gids[];	/* NB: flexible array member */
142};
143
144/*
145 * We have a global-static mutex for single-threading Tattach
146 * requests, which use getpwnam (and indirectly, getgr* functions)
147 * which are not reentrant.
148 */
149static bool fs_attach_mutex_inited;
150static pthread_mutex_t fs_attach_mutex;
151
152/*
153 * Internal functions (except inline functions).
154 */
155static struct passwd *fs_getpwuid(struct fs_softc *, uid_t, struct r_pgdata *);
156static struct group *fs_getgrgid(struct fs_softc *, gid_t, struct r_pgdata *);
157static int fs_buildname(struct l9p_fid *, char *, char *, size_t);
158static int fs_pdir(struct fs_softc *, struct l9p_fid *, char *, size_t,
159    struct stat *st);
160static int fs_dpf(char *, char *, size_t);
161static int fs_oflags_dotu(int, int *);
162static int fs_oflags_dotl(uint32_t, int *, enum l9p_omode *);
163static int fs_nde(struct fs_softc *, struct l9p_fid *, bool, gid_t,
164    struct stat *, uid_t *, gid_t *);
165static struct fs_fid *open_fid(int, const char *, struct fs_authinfo *, bool);
166static void dostat(struct fs_softc *, struct l9p_stat *, char *,
167    struct stat *, bool dotu);
168static void dostatfs(struct l9p_statfs *, struct statfs *, long);
169static void fillacl(struct fs_fid *ff);
170static struct l9p_acl *getacl(struct fs_fid *ff, int fd, const char *path);
171static void dropacl(struct fs_fid *ff);
172static struct l9p_acl *look_for_nfsv4_acl(struct fs_fid *ff, int fd,
173    const char *path);
174static int check_access(int32_t,
175    struct l9p_acl *, struct stat *, struct l9p_acl *, struct stat *,
176    struct fs_authinfo *, gid_t);
177static void generate_qid(struct stat *, struct l9p_qid *);
178
179static int fs_icreate(void *, struct l9p_fid *, char *, int,
180    bool, mode_t, gid_t, struct stat *);
181static int fs_iopen(void *, struct l9p_fid *, int, enum l9p_omode,
182    gid_t, struct stat *);
183static int fs_imkdir(void *, struct l9p_fid *, char *,
184    bool, mode_t, gid_t, struct stat *);
185static int fs_imkfifo(void *, struct l9p_fid *, char *,
186    bool, mode_t, gid_t, struct stat *);
187static int fs_imknod(void *, struct l9p_fid *, char *,
188    bool, mode_t, dev_t, gid_t, struct stat *);
189static int fs_imksocket(void *, struct l9p_fid *, char *,
190    bool, mode_t, gid_t, struct stat *);
191static int fs_isymlink(void *, struct l9p_fid *, char *, char *,
192    gid_t, struct stat *);
193
194/*
195 * Internal functions implementing backend.
196 */
197static int fs_attach(void *, struct l9p_request *);
198static int fs_clunk(void *, struct l9p_fid *);
199static int fs_create(void *, struct l9p_request *);
200static int fs_open(void *, struct l9p_request *);
201static int fs_read(void *, struct l9p_request *);
202static int fs_remove(void *, struct l9p_fid *);
203static int fs_stat(void *, struct l9p_request *);
204static int fs_walk(void *, struct l9p_request *);
205static int fs_write(void *, struct l9p_request *);
206static int fs_wstat(void *, struct l9p_request *);
207static int fs_statfs(void *, struct l9p_request *);
208static int fs_lopen(void *, struct l9p_request *);
209static int fs_lcreate(void *, struct l9p_request *);
210static int fs_symlink(void *, struct l9p_request *);
211static int fs_mknod(void *, struct l9p_request *);
212static int fs_rename(void *, struct l9p_request *);
213static int fs_readlink(void *, struct l9p_request *);
214static int fs_getattr(void *, struct l9p_request *);
215static int fs_setattr(void *, struct l9p_request *);
216static int fs_xattrwalk(void *, struct l9p_request *);
217static int fs_xattrcreate(void *, struct l9p_request *);
218static int fs_readdir(void *, struct l9p_request *);
219static int fs_fsync(void *, struct l9p_request *);
220static int fs_lock(void *, struct l9p_request *);
221static int fs_getlock(void *, struct l9p_request *);
222static int fs_link(void *, struct l9p_request *);
223static int fs_renameat(void *, struct l9p_request *);
224static int fs_unlinkat(void *, struct l9p_request *);
225static void fs_freefid(void *, struct l9p_fid *);
226
227/*
228 * Convert from 9p2000 open/create mode to Unix-style O_* flags.
229 * This includes 9p2000.u extensions, but not 9p2000.L protocol,
230 * which has entirely different open, create, etc., flag bits.
231 *
232 * The <mode> given here is the one-byte (uint8_t) "mode"
233 * argument to Tcreate or Topen, so it can have at most 8 bits.
234 *
235 * https://swtch.com/plan9port/man/man9/open.html and
236 * http://plan9.bell-labs.com/magic/man2html/5/open
237 * both say:
238 *
239 *   The [low two bits of the] mode field determines the
240 *   type of I/O ... [I]f mode has the OTRUNC (0x10) bit
241 *   set, the file is to be truncated, which requires write
242 *   permission ...; if the mode has the ORCLOSE (0x40) bit
243 *   set, the file is to be removed when the fid is clunked,
244 *   which requires permission to remove the file from its
245 *   directory.  All other bits in mode should be zero.  It
246 *   is illegal to write a directory, truncate it, or
247 *   attempt to remove it on close.
248 *
249 * 9P2000.u may add ODIRECT (0x80); this is not completely clear.
250 * The fcall.h header defines OCEXEC (0x20) as well, but it makes
251 * no sense to send this to a server.  There seem to be no bits
252 * 0x04 and 0x08.
253 *
254 * We always turn on O_NOCTTY since as a server, we never want
255 * to gain a controlling terminal.  We always turn on O_NOFOLLOW
256 * for reasons described elsewhere.
257 */
258static int
259fs_oflags_dotu(int mode, int *aflags)
260{
261	int flags;
262#define	CONVERT(theirs, ours) \
263	do { \
264		if (mode & (theirs)) { \
265			mode &= ~(theirs); \
266			flags |= ours; \
267		} \
268	} while (0)
269
270	switch (mode & L9P_OACCMODE) {
271
272	case L9P_OREAD:
273	default:
274		flags = O_RDONLY;
275		break;
276
277	case L9P_OWRITE:
278		flags = O_WRONLY;
279		break;
280
281	case L9P_ORDWR:
282		flags = O_RDWR;
283		break;
284
285	case L9P_OEXEC:
286		if (mode & L9P_OTRUNC)
287			return (EINVAL);
288		flags = O_RDONLY;
289		break;
290	}
291
292	flags |= O_NOCTTY | O_NOFOLLOW;
293
294	CONVERT(L9P_OTRUNC, O_TRUNC);
295
296	/*
297	 * Now take away some flags locally:
298	 *   the access mode (already translated)
299	 *   ORCLOSE - caller only
300	 *   OCEXEC - makes no sense in server
301	 *   ODIRECT - not applicable here
302	 * If there are any flag bits left after this,
303	 * we were unable to translate them.  For now, let's
304	 * treat this as EINVAL so that we can catch problems.
305	 */
306	mode &= ~(L9P_OACCMODE | L9P_ORCLOSE | L9P_OCEXEC | L9P_ODIRECT);
307	if (mode != 0) {
308		L9P_LOG(L9P_INFO,
309		    "fs_oflags_dotu: untranslated bits: %#x",
310		    (unsigned)mode);
311		return (EINVAL);
312	}
313
314	*aflags = flags;
315	return (0);
316#undef CONVERT
317}
318
319/*
320 * Convert from 9P2000.L (Linux) open mode bits to O_* flags.
321 * See fs_oflags_dotu above.
322 *
323 * Linux currently does not have open-for-exec, but there is a
324 * proposal for it using O_PATH|O_NOFOLLOW, now handled here.
325 *
326 * We may eventually also set L9P_ORCLOSE for L_O_TMPFILE.
327 */
328static int
329fs_oflags_dotl(uint32_t l_mode, int *aflags, enum l9p_omode *ap9)
330{
331	int flags;
332	enum l9p_omode p9;
333#define	CLEAR(theirs)	l_mode &= ~(uint32_t)(theirs)
334#define	CONVERT(theirs, ours) \
335	do { \
336		if (l_mode & (theirs)) { \
337			CLEAR(theirs); \
338			flags |= ours; \
339		} \
340	} while (0)
341
342	/*
343	 * Linux O_RDONLY, O_WRONLY, O_RDWR (0,1,2) match BSD/MacOS.
344	 */
345	flags = l_mode & O_ACCMODE;
346	if (flags == 3)
347		return (EINVAL);
348	CLEAR(O_ACCMODE);
349
350	if ((l_mode & (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) ==
351		    (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) {
352		CLEAR(L9P_L_O_PATH | L9P_L_O_NOFOLLOW);
353		p9 = L9P_OEXEC;
354	} else {
355		/*
356		 * Slightly dirty, but same dirt, really, as
357		 * setting flags from l_mode & O_ACCMODE.
358		 */
359		p9 = (enum l9p_omode)flags;	/* slightly dirty */
360	}
361
362	/* turn L_O_TMPFILE into L9P_ORCLOSE in *p9? */
363	if (l_mode & L9P_L_O_TRUNC)
364		p9 |= L9P_OTRUNC;	/* but don't CLEAR yet */
365
366	flags |= O_NOCTTY | O_NOFOLLOW;
367
368	/*
369	 * L_O_CREAT seems to be noise, since we get separate open
370	 * and create.  But it is actually set sometimes.  We just
371	 * throw it out here; create ops must set it themselves and
372	 * open ops have no permissions bits and hence cannot create.
373	 *
374	 * L_O_EXCL does make sense on create ops, i.e., we can
375	 * take a create op with or without L_O_EXCL.  We pass that
376	 * through.
377	 */
378	CLEAR(L9P_L_O_CREAT);
379	CONVERT(L9P_L_O_EXCL, O_EXCL);
380	CONVERT(L9P_L_O_TRUNC, O_TRUNC);
381	CONVERT(L9P_L_O_DIRECTORY, O_DIRECTORY);
382	CONVERT(L9P_L_O_APPEND, O_APPEND);
383	CONVERT(L9P_L_O_NONBLOCK, O_NONBLOCK);
384
385	/*
386	 * Discard these as useless noise at our (server) end.
387	 * (NOATIME might be useful but we can only set it on a
388	 * per-mount basis.)
389	 */
390	CLEAR(L9P_L_O_CLOEXEC);
391	CLEAR(L9P_L_O_DIRECT);
392	CLEAR(L9P_L_O_DSYNC);
393	CLEAR(L9P_L_O_FASYNC);
394	CLEAR(L9P_L_O_LARGEFILE);
395	CLEAR(L9P_L_O_NOATIME);
396	CLEAR(L9P_L_O_NOCTTY);
397	CLEAR(L9P_L_O_NOFOLLOW);
398	CLEAR(L9P_L_O_SYNC);
399
400	if (l_mode != 0) {
401		L9P_LOG(L9P_INFO,
402		    "fs_oflags_dotl: untranslated bits: %#x",
403		    (unsigned)l_mode);
404		return (EINVAL);
405	}
406
407	*aflags = flags;
408	*ap9 = p9;
409	return (0);
410#undef CLEAR
411#undef CONVERT
412}
413
414static struct passwd *
415fs_getpwuid(struct fs_softc *sc, uid_t uid, struct r_pgdata *pg)
416{
417#if defined(WITH_CASPER)
418	return (r_cap_getpwuid(sc->fs_cappwd, uid, pg));
419#else
420	(void)sc;
421	return (r_getpwuid(uid, pg));
422#endif
423}
424
425static struct group *
426fs_getgrgid(struct fs_softc *sc, gid_t gid, struct r_pgdata *pg)
427{
428#if defined(WITH_CASPER)
429	return (r_cap_getgrgid(sc->fs_capgrp, gid, pg));
430#else
431	(void)sc;
432	return (r_getgrgid(gid, pg));
433#endif
434}
435
436/*
437 * Build full name of file by appending given name to directory name.
438 */
439static int
440fs_buildname(struct l9p_fid *dir, char *name, char *buf, size_t size)
441{
442	struct fs_fid *dirf = dir->lo_aux;
443	size_t dlen, nlen1;
444
445	assert(dirf != NULL);
446	dlen = strlen(dirf->ff_name);
447	nlen1 = strlen(name) + 1;	/* +1 for '\0' */
448	if (dlen + 1 + nlen1 > size)
449		return (ENAMETOOLONG);
450	memcpy(buf, dirf->ff_name, dlen);
451	buf[dlen] = '/';
452	memcpy(buf + dlen + 1, name, nlen1);
453	return (0);
454}
455
456/*
457 * Build parent name of file by splitting it off.  Return an error
458 * if the given fid represents the root, so that there is no such
459 * parent, or if the discovered parent is not a directory.
460 */
461static int
462fs_pdir(struct fs_softc *sc __unused, struct l9p_fid *fid, char *buf,
463    size_t size, struct stat *st)
464{
465	struct fs_fid *ff;
466	char *path;
467
468	ff = fid->lo_aux;
469	assert(ff != NULL);
470	path = ff->ff_name;
471	path = r_dirname(path, buf, size);
472	if (path == NULL)
473		return (ENAMETOOLONG);
474	if (fstatat(ff->ff_dirfd, path, st, AT_SYMLINK_NOFOLLOW) != 0)
475		return (errno);
476	if (!S_ISDIR(st->st_mode))
477		return (ENOTDIR);
478	return (0);
479}
480
481/*
482 * Like fs_buildname() but for adding a file name to a buffer
483 * already holding a directory name.  Essentially does
484 *     strcat(dbuf, "/");
485 *     strcat(dbuf, fname);
486 * but with size checking and an ENAMETOOLONG error as needed.
487 *
488 * (Think of the function name as "directory plus-equals file".)
489 */
490static int
491fs_dpf(char *dbuf, char *fname, size_t size)
492{
493	size_t dlen, nlen1;
494
495	dlen = strlen(dbuf);
496	nlen1 = strlen(fname) + 1;
497	if (dlen + 1 + nlen1 > size)
498		return (ENAMETOOLONG);
499	dbuf[dlen] = '/';
500	memcpy(dbuf + dlen + 1, fname, nlen1);
501	return (0);
502}
503
504/*
505 * Prepare to create a new directory entry (open with O_CREAT,
506 * mkdir, etc -- any operation that creates a new inode),
507 * operating in parent data <dir>, based on authinfo <ai> and
508 * effective gid <egid>.
509 *
510 * The new entity should be owned by user/group <*nuid, *ngid>,
511 * if it's really a new entity.  It will be a directory if isdir.
512 *
513 * Returns an error number if the entry should not be created
514 * (e.g., read-only file system or no permission to write in
515 * parent directory).  Always sets *nuid and *ngid on success:
516 * in the worst case, when there is no available ID, this will
517 * use the parent directory's IDs.  Fills in <*st> on success.
518 */
519static int
520fs_nde(struct fs_softc *sc, struct l9p_fid *dir, bool isdir, gid_t egid,
521    struct stat *st, uid_t *nuid, gid_t *ngid)
522{
523	struct fs_fid *dirf;
524	struct fs_authinfo *ai;
525	int32_t op;
526	int error;
527
528	if (sc->fs_readonly)
529		return (EROFS);
530	dirf = dir->lo_aux;
531	assert(dirf != NULL);
532	if (fstatat(dirf->ff_dirfd, dirf->ff_name, st,
533	    AT_SYMLINK_NOFOLLOW) != 0)
534		return (errno);
535	if (!S_ISDIR(st->st_mode))
536		return (ENOTDIR);
537	dirf = dir->lo_aux;
538	ai = dirf->ff_ai;
539	fillacl(dirf);
540	op = isdir ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
541	error = check_access(op, dirf->ff_acl, st, NULL, NULL, ai, egid);
542	if (error)
543		return (EPERM);
544
545	*nuid = ai->ai_uid != (uid_t)-1 ? ai->ai_uid : st->st_uid;
546	*ngid = egid != (gid_t)-1 ? egid :
547	    ai->ai_ngids > 0 ?  ai->ai_gids[0] : st->st_gid;
548	return (0);
549}
550
551/*
552 * Allocate new open-file data structure to attach to a fid.
553 *
554 * The new file's authinfo is the same as the old one's, and
555 * we gain a reference.
556 */
557static struct fs_fid *
558open_fid(int dirfd, const char *path, struct fs_authinfo *ai, bool creating)
559{
560	struct fs_fid *ret;
561	uint32_t newcount;
562	int error;
563
564	ret = l9p_calloc(1, sizeof(*ret));
565	error = pthread_mutex_init(&ret->ff_mtx, NULL);
566	if (error) {
567		free(ret);
568		return (NULL);
569	}
570	ret->ff_fd = -1;
571	ret->ff_dirfd = dirfd;
572	ret->ff_name = strdup(path);
573	if (ret->ff_name == NULL) {
574		pthread_mutex_destroy(&ret->ff_mtx);
575		free(ret);
576		return (NULL);
577	}
578	pthread_mutex_lock(&ai->ai_mtx);
579	newcount = ++ai->ai_refcnt;
580	pthread_mutex_unlock(&ai->ai_mtx);
581	/*
582	 * If we just incremented the count to 1, we're the *first*
583	 * reference.  This is only allowed when creating the authinfo,
584	 * otherwise it means something has gone wrong.  This cannot
585	 * catch every bad (re)use of a freed authinfo but it may catch
586	 * a few.
587	 */
588	assert(newcount > 1 || creating);
589	L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
590	    (void *)ai, (u_long)newcount);
591	ret->ff_ai = ai;
592	return (ret);
593}
594
595static void
596dostat(struct fs_softc *sc, struct l9p_stat *s, char *name,
597    struct stat *buf, bool dotu)
598{
599	struct passwd *user;
600	struct group *group;
601
602	memset(s, 0, sizeof(struct l9p_stat));
603
604	generate_qid(buf, &s->qid);
605
606	s->type = 0;
607	s->dev = 0;
608	s->mode = buf->st_mode & 0777;
609
610	if (S_ISDIR(buf->st_mode))
611		s->mode |= L9P_DMDIR;
612
613	if (S_ISLNK(buf->st_mode) && dotu)
614		s->mode |= L9P_DMSYMLINK;
615
616	if (S_ISCHR(buf->st_mode) || S_ISBLK(buf->st_mode))
617		s->mode |= L9P_DMDEVICE;
618
619	if (S_ISSOCK(buf->st_mode))
620		s->mode |= L9P_DMSOCKET;
621
622	if (S_ISFIFO(buf->st_mode))
623		s->mode |= L9P_DMNAMEDPIPE;
624
625	s->atime = (uint32_t)buf->st_atime;
626	s->mtime = (uint32_t)buf->st_mtime;
627	s->length = (uint64_t)buf->st_size;
628
629	s->name = r_basename(name, NULL, 0);
630
631	if (!dotu) {
632		struct r_pgdata udata, gdata;
633
634		user = fs_getpwuid(sc, buf->st_uid, &udata);
635		group = fs_getgrgid(sc, buf->st_gid, &gdata);
636		s->uid = user != NULL ? strdup(user->pw_name) : NULL;
637		s->gid = group != NULL ? strdup(group->gr_name) : NULL;
638		s->muid = user != NULL ? strdup(user->pw_name) : NULL;
639		r_pgfree(&udata);
640		r_pgfree(&gdata);
641	} else {
642		/*
643		 * When using 9P2000.u, we don't need to bother about
644		 * providing user and group names in textual form.
645		 *
646		 * NB: if the asprintf()s fail, s->extension should
647		 * be unset so we can ignore these.
648		 */
649		s->n_uid = buf->st_uid;
650		s->n_gid = buf->st_gid;
651		s->n_muid = buf->st_uid;
652
653		if (S_ISLNK(buf->st_mode)) {
654			char target[MAXPATHLEN];
655			ssize_t ret = readlink(name, target, MAXPATHLEN);
656
657			if (ret < 0) {
658				s->extension = NULL;
659				return;
660			}
661
662			s->extension = strndup(target, (size_t)ret);
663		}
664
665		if (S_ISBLK(buf->st_mode)) {
666			asprintf(&s->extension, "b %d %d", major(buf->st_rdev),
667			    minor(buf->st_rdev));
668		}
669
670		if (S_ISCHR(buf->st_mode)) {
671			asprintf(&s->extension, "c %d %d", major(buf->st_rdev),
672			    minor(buf->st_rdev));
673		}
674	}
675}
676
677static void dostatfs(struct l9p_statfs *out, struct statfs *in, long namelen)
678{
679
680	out->type = L9P_FSTYPE;
681	out->bsize = in->f_bsize;
682	out->blocks = in->f_blocks;
683	out->bfree = in->f_bfree;
684	out->bavail = in->f_bavail;
685	out->files = in->f_files;
686	out->ffree = in->f_ffree;
687	out->namelen = (uint32_t)namelen;
688	out->fsid = ((uint64_t)in->f_fsid.val[0] << 32) |
689	    (uint64_t)in->f_fsid.val[1];
690}
691
692static void
693generate_qid(struct stat *buf, struct l9p_qid *qid)
694{
695	qid->path = buf->st_ino;
696	qid->version = 0;
697
698	if (S_ISREG(buf->st_mode))
699		qid->type |= L9P_QTFILE;
700
701	if (S_ISDIR(buf->st_mode))
702		qid->type |= L9P_QTDIR;
703
704	if (S_ISLNK(buf->st_mode))
705		qid->type |= L9P_QTSYMLINK;
706}
707
708/*
709 * Fill in ff->ff_acl if it's not set yet.  Skip if the "don't use
710 * ACLs" flag is set, and use the flag to remember failure so
711 * we don't bother retrying either.
712 */
713static void
714fillacl(struct fs_fid *ff)
715{
716
717	if (ff->ff_acl == NULL && (ff->ff_flags & FF_NO_NFSV4_ACL) == 0) {
718		ff->ff_acl = look_for_nfsv4_acl(ff, ff->ff_fd, ff->ff_name);
719		if (ff->ff_acl == NULL)
720			ff->ff_flags |= FF_NO_NFSV4_ACL;
721	}
722}
723
724/*
725 * Get an ACL given fd and/or path name.  We check for the "don't get
726 * ACL" flag in the given ff_fid data structure first, but don't set
727 * the flag here.  The fillacl() code is similar but will set the
728 * flag; it also uses the ff_fd and ff_name directly.
729 *
730 * (This is used to get ACLs for parent directories, for instance.)
731 */
732static struct l9p_acl *
733getacl(struct fs_fid *ff, int fd, const char *path)
734{
735
736	if (ff->ff_flags & FF_NO_NFSV4_ACL)
737		return (NULL);
738	return look_for_nfsv4_acl(ff, fd, path);
739}
740
741/*
742 * Drop cached ff->ff_acl, e.g., after moving from one directory to
743 * another, where inherited ACLs might change.
744 */
745static void
746dropacl(struct fs_fid *ff)
747{
748
749	l9p_acl_free(ff->ff_acl);
750	ff->ff_acl = NULL;
751	ff->ff_flags = ff->ff_ai->ai_flags;
752}
753
754/*
755 * Check to see if we can find NFSv4 ACLs for the given file.
756 * If we have an open fd, we can use that, otherwise we need
757 * to use the path.
758 */
759static struct l9p_acl *
760look_for_nfsv4_acl(struct fs_fid *ff, int fd, const char *path)
761{
762	struct l9p_acl *acl;
763	acl_t sysacl;
764	int doclose = 0;
765
766	if (fd < 0) {
767		fd = openat(ff->ff_dirfd, path, 0);
768		doclose = 1;
769	}
770
771	sysacl = acl_get_fd_np(fd, ACL_TYPE_NFS4);
772	if (sysacl == NULL) {
773		/*
774		 * EINVAL means no NFSv4 ACLs apply for this file.
775		 * Other error numbers indicate some kind of problem.
776		 */
777		if (errno != EINVAL) {
778			L9P_LOG(L9P_ERROR,
779			    "error retrieving NFSv4 ACL from "
780			    "fdesc %d (%s): %s", fd,
781			    path, strerror(errno));
782		}
783
784		if (doclose)
785			close(fd);
786
787		return (NULL);
788	}
789#if defined(HAVE_FREEBSD_ACLS)
790	acl = l9p_freebsd_nfsv4acl_to_acl(sysacl);
791#else
792	acl = NULL; /* XXX need a l9p_darwin_acl_to_acl */
793#endif
794	acl_free(sysacl);
795
796	if (doclose)
797		close(fd);
798
799	return (acl);
800}
801
802/*
803 * Verify that the user whose authinfo is in <ai> and effective
804 * group ID is <egid> ((gid_t)-1 means no egid supplied) has
805 * permission to do something.
806 *
807 * The "something" may be rather complex: we allow NFSv4 style
808 * operation masks here, and provide parent and child ACLs and
809 * stat data.  At most one of pacl+pst and cacl+cst can be NULL,
810 * unless ACLs are not supported; then pacl and cacl can both
811 * be NULL but pst or cst must be non-NULL depending on the
812 * operation.
813 */
814static int
815check_access(int32_t opmask,
816    struct l9p_acl *pacl, struct stat *pst,
817    struct l9p_acl *cacl, struct stat *cst,
818    struct fs_authinfo *ai, gid_t egid)
819{
820	struct l9p_acl_check_args args;
821
822	/*
823	 * If we have ACLs, use them exclusively, ignoring Unix
824	 * permissions.  Otherwise, fall back on stat st_mode
825	 * bits, and allow super-user as well.
826	 */
827	args.aca_uid = ai->ai_uid;
828	args.aca_gid = egid;
829	args.aca_groups = ai->ai_gids;
830	args.aca_ngroups = (size_t)ai->ai_ngids;
831	args.aca_parent = pacl;
832	args.aca_pstat = pst;
833	args.aca_child = cacl;
834	args.aca_cstat = cst;
835	args.aca_aclmode = pacl == NULL && cacl == NULL
836	    ? L9P_ACM_STAT_MODE
837	    : L9P_ACM_NFS_ACL | L9P_ACM_ZFS_ACL;
838
839	args.aca_superuser = true;
840	return (l9p_acl_check_access(opmask, &args));
841}
842
843static int
844fs_attach(void *softc, struct l9p_request *req)
845{
846	struct fs_authinfo *ai;
847	struct fs_softc *sc = (struct fs_softc *)softc;
848	struct fs_fid *file;
849	struct passwd *pwd;
850	struct stat st;
851	struct r_pgdata udata;
852	uint32_t n_uname;
853	gid_t *gids;
854	uid_t uid;
855	int error;
856	int ngroups;
857
858	assert(req->lr_fid != NULL);
859
860	/*
861	 * Single-thread pwd/group related items.  We have a reentrant
862	 * r_getpwuid but not a reentrant r_getpwnam, and l9p_getgrlist
863	 * may use non-reentrant C library getgr* routines.
864	 */
865	pthread_mutex_lock(&fs_attach_mutex);
866
867	n_uname = req->lr_req.tattach.n_uname;
868	if (n_uname != L9P_NONUNAME) {
869		uid = (uid_t)n_uname;
870		pwd = fs_getpwuid(sc, uid, &udata);
871		if (pwd == NULL)
872			L9P_LOG(L9P_DEBUG,
873			    "Tattach: uid %ld: no such user", (long)uid);
874	} else {
875		uid = (uid_t)-1;
876#if defined(WITH_CASPER)
877		pwd = cap_getpwnam(sc->fs_cappwd, req->lr_req.tattach.uname);
878#else
879		pwd = getpwnam(req->lr_req.tattach.uname);
880#endif
881		if (pwd == NULL)
882			L9P_LOG(L9P_DEBUG,
883			    "Tattach: %s: no such user",
884			    req->lr_req.tattach.uname);
885	}
886
887	/*
888	 * If caller didn't give a numeric UID, pick it up from pwd
889	 * if possible.  If that doesn't work we can't continue.
890	 *
891	 * Note that pwd also supplies the group set.  This assumes
892	 * the server has the right mapping; this needs improvement.
893	 * We do at least support ai->ai_ngids==0 properly now though.
894	 */
895	if (uid == (uid_t)-1 && pwd != NULL)
896		uid = pwd->pw_uid;
897	if (uid == (uid_t)-1)
898		error = EPERM;
899	else {
900		error = 0;
901		if (fstat(sc->fs_rootfd, &st) != 0)
902			error = errno;
903		else if (!S_ISDIR(st.st_mode))
904			error = ENOTDIR;
905	}
906	if (error) {
907		pthread_mutex_unlock(&fs_attach_mutex);
908		L9P_LOG(L9P_DEBUG,
909		    "Tattach: denying uid=%ld access to rootdir: %s",
910		    (long)uid, strerror(error));
911		/*
912		 * Pass ENOENT and ENOTDIR through for diagnosis;
913		 * others become EPERM.  This should not leak too
914		 * much security.
915		 */
916		return (error == ENOENT || error == ENOTDIR ? error : EPERM);
917	}
918
919	if (pwd != NULL) {
920		/*
921		 * This either succeeds and fills in ngroups and
922		 * returns non-NULL, or fails and sets ngroups to 0
923		 * and returns NULL.  Either way ngroups is correct.
924		 */
925		gids = l9p_getgrlist(pwd->pw_name, pwd->pw_gid, &ngroups);
926	} else {
927		gids = NULL;
928		ngroups = 0;
929	}
930
931	/*
932	 * Done with pwd and group related items that may use
933	 * non-reentrant C library routines; allow other threads in.
934	 */
935	pthread_mutex_unlock(&fs_attach_mutex);
936
937	ai = malloc(sizeof(*ai) + (size_t)ngroups * sizeof(gid_t));
938	if (ai == NULL) {
939		free(gids);
940		return (ENOMEM);
941	}
942	error = pthread_mutex_init(&ai->ai_mtx, NULL);
943	if (error) {
944		free(gids);
945		free(ai);
946		return (error);
947	}
948	ai->ai_refcnt = 0;
949	ai->ai_uid = uid;
950	ai->ai_flags = 0;	/* XXX for now */
951	ai->ai_ngids = ngroups;
952	memcpy(ai->ai_gids, gids, (size_t)ngroups * sizeof(gid_t));
953	free(gids);
954
955	file = open_fid(sc->fs_rootfd, ".", ai, true);
956	if (file == NULL) {
957		pthread_mutex_destroy(&ai->ai_mtx);
958		free(ai);
959		return (ENOMEM);
960	}
961
962	req->lr_fid->lo_aux = file;
963	generate_qid(&st, &req->lr_resp.rattach.qid);
964	return (0);
965}
966
967static int
968fs_clunk(void *softc __unused, struct l9p_fid *fid)
969{
970	struct fs_fid *file;
971
972	file = fid->lo_aux;
973	assert(file != NULL);
974
975	if (file->ff_dir) {
976		closedir(file->ff_dir);
977		file->ff_dir = NULL;
978	} else if (file->ff_fd != -1) {
979		close(file->ff_fd);
980		file->ff_fd = -1;
981	}
982
983	return (0);
984}
985
986/*
987 * Create ops.
988 *
989 * We are to create a new file under some existing path,
990 * where the new file's name is in the Tcreate request and the
991 * existing path is due to a fid-based file (req->lr_fid).
992 *
993 * One op (create regular file) sets file->fd, the rest do not.
994 */
995static int
996fs_create(void *softc, struct l9p_request *req)
997{
998	struct l9p_fid *dir;
999	struct stat st;
1000	uint32_t dmperm;
1001	mode_t perm;
1002	char *name;
1003	int error;
1004
1005	dir = req->lr_fid;
1006	name = req->lr_req.tcreate.name;
1007	dmperm = req->lr_req.tcreate.perm;
1008	perm = (mode_t)(dmperm & 0777);
1009
1010	if (dmperm & L9P_DMDIR)
1011		error = fs_imkdir(softc, dir, name, true,
1012		    perm, (gid_t)-1, &st);
1013	else if (dmperm & L9P_DMSYMLINK)
1014		error = fs_isymlink(softc, dir, name,
1015		    req->lr_req.tcreate.extension, (gid_t)-1, &st);
1016	else if (dmperm & L9P_DMNAMEDPIPE)
1017		error = fs_imkfifo(softc, dir, name, true,
1018		    perm, (gid_t)-1, &st);
1019	else if (dmperm & L9P_DMSOCKET)
1020		error = fs_imksocket(softc, dir, name, true,
1021		    perm, (gid_t)-1, &st);
1022	else if (dmperm & L9P_DMDEVICE) {
1023		unsigned int major, minor;
1024		char type;
1025		dev_t dev;
1026
1027		/*
1028		 * ??? Should this be testing < 3?  For now, allow a single
1029		 * integer mode with minor==0 implied.
1030		 */
1031		minor = 0;
1032		if (sscanf(req->lr_req.tcreate.extension, "%c %u %u",
1033		    &type, &major, &minor) < 2) {
1034			return (EINVAL);
1035		}
1036
1037		switch (type) {
1038		case 'b':
1039			perm |= S_IFBLK;
1040			break;
1041		case 'c':
1042			perm |= S_IFCHR;
1043			break;
1044		default:
1045			return (EINVAL);
1046		}
1047		dev = makedev(major, minor);
1048		error = fs_imknod(softc, dir, name, true, perm, dev,
1049		    (gid_t)-1, &st);
1050	} else {
1051		enum l9p_omode p9;
1052		int flags;
1053
1054		p9 = req->lr_req.tcreate.mode;
1055		error = fs_oflags_dotu(p9, &flags);
1056		if (error)
1057			return (error);
1058		error = fs_icreate(softc, dir, name, flags,
1059		    true, perm, (gid_t)-1, &st);
1060		req->lr_resp.rcreate.iounit = req->lr_conn->lc_max_io_size;
1061	}
1062
1063	if (error == 0)
1064		generate_qid(&st, &req->lr_resp.rcreate.qid);
1065
1066	return (error);
1067}
1068
1069/*
1070 * https://swtch.com/plan9port/man/man9/open.html and
1071 * http://plan9.bell-labs.com/magic/man2html/5/open
1072 * say that permissions are actually
1073 *     perm & (~0666 | (dir.perm & 0666))
1074 * for files, and
1075 *     perm & (~0777 | (dir.perm & 0777))
1076 * for directories.  That is, the parent directory may
1077 * take away permissions granted by the operation.
1078 *
1079 * This seems a bit restrictive; probably
1080 * there should be a control knob for this.
1081 */
1082static inline mode_t
1083fs_p9perm(mode_t perm, mode_t dir_perm, bool isdir)
1084{
1085
1086	if (isdir)
1087		perm &= ~0777 | (dir_perm & 0777);
1088	else
1089		perm &= ~0666 | (dir_perm & 0666);
1090	return (perm);
1091}
1092
1093/*
1094 * Internal form of create (plain file).
1095 *
1096 * Our caller takes care of splitting off all the special
1097 * types of create (mknod, etc), so this is purely for files.
1098 * We receive the fs_softc <softc>, the directory fid <dir>
1099 * in which the new file is to be created, the name of the
1100 * new file, a flag <isp9> indicating whether to do plan9 style
1101 * permissions or Linux style permissions, the permissions <perm>,
1102 * an effective group id <egid>, and a pointer to a stat structure
1103 * <st> to fill in describing the final result on success.
1104 *
1105 * On successful create, the fid switches to the newly created
1106 * file, which is now open; its associated file-name changes too.
1107 *
1108 * Note that the original (dir) fid is never currently open,
1109 * so there is nothing to close.
1110 */
1111static int
1112fs_icreate(void *softc, struct l9p_fid *dir, char *name, int flags,
1113    bool isp9, mode_t perm, gid_t egid, struct stat *st)
1114{
1115	struct fs_fid *file;
1116	gid_t gid;
1117	uid_t uid;
1118	char newname[MAXPATHLEN];
1119	int error, fd;
1120
1121	file = dir->lo_aux;
1122
1123	/*
1124	 * Build full path name from directory + file name.  We'll
1125	 * check permissions on the parent directory, then race to
1126	 * create the file before anything bad happens like symlinks.
1127	 *
1128	 * (To close this race we need to use openat(), which is
1129	 * left for a later version of this code.)
1130	 */
1131	error = fs_buildname(dir, name, newname, sizeof(newname));
1132	if (error)
1133		return (error);
1134
1135	/* In case of success, we will need a new file->ff_name. */
1136	name = strdup(newname);
1137	if (name == NULL)
1138		return (ENOMEM);
1139
1140	/* Check create permission and compute new file ownership. */
1141	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1142	if (error) {
1143		free(name);
1144		return (error);
1145	}
1146
1147	/* Adjust new-file permissions for Plan9 protocol. */
1148	if (isp9)
1149		perm = fs_p9perm(perm, st->st_mode, false);
1150
1151	/* Create is always exclusive so O_TRUNC is irrelevant. */
1152	fd = openat(file->ff_dirfd, newname, flags | O_CREAT | O_EXCL, perm);
1153	if (fd < 0) {
1154		error = errno;
1155		free(name);
1156		return (error);
1157	}
1158
1159	/* Fix permissions and owner. */
1160	if (fchmod(fd, perm) != 0 ||
1161	    fchown(fd, uid, gid) != 0 ||
1162	    fstat(fd, st) != 0) {
1163		error = errno;
1164		(void) close(fd);
1165		/* unlink(newname); ? */
1166		free(name);
1167		return (error);
1168	}
1169
1170	/* It *was* a directory; now it's a file, and it's open. */
1171	free(file->ff_name);
1172	file->ff_name = name;
1173	file->ff_fd = fd;
1174	return (0);
1175}
1176
1177/*
1178 * Internal form of open: stat file and verify permissions (from p9
1179 * argument), then open the file-or-directory, leaving the internal
1180 * fs_fid fields set up.  If we cannot open the file, return a
1181 * suitable error number, and leave everything unchanged.
1182 *
1183 * To mitigate the race between permissions testing and the actual
1184 * open, we can stat the file twice (once with lstat() before open,
1185 * then with fstat() after).  We assume O_NOFOLLOW is set in flags,
1186 * so if some other race-winner substitutes in a symlink we won't
1187 * open it here.  (However, embedded symlinks, if they occur, are
1188 * still an issue.  Ideally we would like to have an O_NEVERFOLLOW
1189 * that fails on embedded symlinks, and a way to pass this to
1190 * lstat() as well.)
1191 *
1192 * When we use opendir() we cannot pass O_NOFOLLOW, so we must rely
1193 * on substitution-detection via fstat().  To simplify the code we
1194 * just always re-check.
1195 *
1196 * (For a proper fix in the future, we can require openat(), keep
1197 * each parent directory open during walk etc, and allow only final
1198 * name components with O_NOFOLLOW.)
1199 *
1200 * On successful return, st has been filled in.
1201 */
1202static int
1203fs_iopen(void *softc, struct l9p_fid *fid, int flags, enum l9p_omode p9,
1204    gid_t egid __unused, struct stat *st)
1205{
1206	struct fs_softc *sc = softc;
1207	struct fs_fid *file;
1208	struct stat first;
1209	int32_t op;
1210	char *name;
1211	int error;
1212	int fd;
1213	DIR *dirp;
1214
1215	/* Forbid write ops on read-only file system. */
1216	if (sc->fs_readonly) {
1217		if ((flags & O_TRUNC) != 0)
1218			return (EROFS);
1219		if ((flags & O_ACCMODE) != O_RDONLY)
1220			return (EROFS);
1221		if (p9 & L9P_ORCLOSE)
1222			return (EROFS);
1223	}
1224
1225	file = fid->lo_aux;
1226	assert(file != NULL);
1227	name = file->ff_name;
1228
1229	if (fstatat(file->ff_dirfd, name, &first, AT_SYMLINK_NOFOLLOW) != 0)
1230		return (errno);
1231	if (S_ISLNK(first.st_mode))
1232		return (EPERM);
1233
1234	/* Can we rely on O_APPEND here?  Best not, can be cleared. */
1235	switch (flags & O_ACCMODE) {
1236	case O_RDONLY:
1237		op = L9P_ACE_READ_DATA;
1238		break;
1239	case O_WRONLY:
1240		op = L9P_ACE_WRITE_DATA;
1241		break;
1242	case O_RDWR:
1243		op = L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA;
1244		break;
1245	default:
1246		return (EINVAL);
1247	}
1248	fillacl(file);
1249	error = check_access(op, NULL, NULL, file->ff_acl, &first,
1250	    file->ff_ai, (gid_t)-1);
1251	if (error)
1252		return (error);
1253
1254	if (S_ISDIR(first.st_mode)) {
1255		/* Forbid write or truncate on directory. */
1256		if ((flags & O_ACCMODE) != O_RDONLY || (flags & O_TRUNC))
1257			return (EPERM);
1258		fd = openat(file->ff_dirfd, name, O_DIRECTORY);
1259		dirp = fdopendir(fd);
1260		if (dirp == NULL)
1261			return (EPERM);
1262		fd = dirfd(dirp);
1263	} else {
1264		dirp = NULL;
1265		fd = openat(file->ff_dirfd, name, flags);
1266		if (fd < 0)
1267			return (EPERM);
1268	}
1269
1270	/*
1271	 * We have a valid fd, and maybe non-null dirp.  Re-check
1272	 * the file, and fail if st_dev or st_ino changed.
1273	 */
1274	if (fstat(fd, st) != 0 ||
1275	    first.st_dev != st->st_dev ||
1276	    first.st_ino != st->st_ino) {
1277		if (dirp != NULL)
1278			(void) closedir(dirp);
1279		else
1280			(void) close(fd);
1281		return (EPERM);
1282	}
1283	if (dirp != NULL)
1284		file->ff_dir = dirp;
1285	else
1286		file->ff_fd = fd;
1287	return (0);
1288}
1289
1290/*
1291 * Internal form of mkdir (common code for all forms).
1292 * We receive the fs_softc <softc>, the directory fid <dir>
1293 * in which the new entry is to be created, the name of the
1294 * new entry, a flag <isp9> indicating whether to do plan9 style
1295 * permissions or Linux style permissions, the permissions <perm>,
1296 * an effective group id <egid>, and a pointer to a stat structure
1297 * <st> to fill in describing the final result on success.
1298 *
1299 * See also fs_icreate() above.
1300 */
1301static int
1302fs_imkdir(void *softc, struct l9p_fid *dir, char *name,
1303    bool isp9, mode_t perm, gid_t egid, struct stat *st)
1304{
1305	struct fs_fid *ff;
1306	gid_t gid;
1307	uid_t uid;
1308	char newname[MAXPATHLEN];
1309	int error, fd;
1310
1311	ff = dir->lo_aux;
1312	error = fs_buildname(dir, name, newname, sizeof(newname));
1313	if (error)
1314		return (error);
1315
1316	error = fs_nde(softc, dir, true, egid, st, &uid, &gid);
1317	if (error)
1318		return (error);
1319
1320	if (isp9)
1321		perm = fs_p9perm(perm, st->st_mode, true);
1322
1323	if (mkdirat(ff->ff_dirfd, newname, perm) != 0)
1324		return (errno);
1325
1326	fd = openat(ff->ff_dirfd, newname,
1327	    O_DIRECTORY | O_RDONLY | O_NOFOLLOW);
1328	if (fd < 0 ||
1329	    fchown(fd, uid, gid) != 0 ||
1330	    fchmod(fd, perm) != 0 ||
1331	    fstat(fd, st) != 0) {
1332		error = errno;
1333		/* rmdir(newname) ? */
1334	}
1335	if (fd >= 0)
1336		(void) close(fd);
1337
1338	return (error);
1339}
1340
1341#ifdef __APPLE__
1342/*
1343 * This is an undocumented OS X syscall. It would be best to avoid it,
1344 * but there doesn't seem to be another safe way to implement mknodat.
1345 * Dear Apple, please implement mknodat before you remove this syscall.
1346 */
1347static int fs_ifchdir_thread_local(int fd)
1348{
1349#pragma clang diagnostic push
1350#pragma clang diagnostic ignored "-Wdeprecated-declarations"
1351	return syscall(SYS___pthread_fchdir, fd);
1352#pragma clang diagnostic pop
1353}
1354#endif
1355
1356/*
1357 * Internal form of mknod (special device).
1358 *
1359 * The device type (S_IFBLK, S_IFCHR) is included in the <mode> parameter.
1360 */
1361static int
1362fs_imknod(void *softc, struct l9p_fid *dir, char *name,
1363    bool isp9, mode_t mode, dev_t dev, gid_t egid, struct stat *st)
1364{
1365	struct fs_fid *ff;
1366	mode_t perm;
1367	gid_t gid;
1368	uid_t uid;
1369	char newname[MAXPATHLEN];
1370	int error;
1371
1372	ff = dir->lo_aux;
1373	error = fs_buildname(dir, name, newname, sizeof(newname));
1374	if (error)
1375		return (error);
1376
1377	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1378	if (error)
1379		return (error);
1380
1381	if (isp9) {
1382		perm = fs_p9perm(mode & 0777, st->st_mode, false);
1383		mode = (mode & ~0777) | perm;
1384	} else {
1385		perm = mode & 0777;
1386	}
1387
1388#ifdef __APPLE__
1389	if (fs_ifchdir_thread_local(ff->ff_dirfd) < 0) {
1390		return -1;
1391	}
1392	error = mknod(newname, mode, dev);
1393	int preserved_errno = errno;
1394	/* Stop using the thread-local cwd */
1395	fs_ifchdir_thread_local(-1);
1396	if (error < 0) {
1397		errno = preserved_errno;
1398		return errno;
1399	}
1400#else
1401	if (mknodat(ff->ff_dirfd, newname, mode, dev) != 0)
1402		return (errno);
1403#endif
1404
1405	/* We cannot open the new name; race to use l* syscalls. */
1406	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1407	    fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
1408	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1409		error = errno;
1410	else if ((st->st_mode & S_IFMT) != (mode & S_IFMT))
1411		error = EPERM;		/* ??? lost a race anyway */
1412
1413	/* if (error) unlink(newname) ? */
1414
1415	return (error);
1416}
1417
1418/*
1419 * Internal form of mkfifo.
1420 */
1421static int
1422fs_imkfifo(void *softc, struct l9p_fid *dir, char *name,
1423    bool isp9, mode_t perm, gid_t egid, struct stat *st)
1424{
1425	struct fs_fid *ff;
1426	gid_t gid;
1427	uid_t uid;
1428	char newname[MAXPATHLEN];
1429	int error;
1430
1431	ff = dir->lo_aux;
1432	error = fs_buildname(dir, name, newname, sizeof(newname));
1433	if (error)
1434		return (error);
1435
1436	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1437	if (error)
1438		return (error);
1439
1440	if (isp9)
1441		perm = fs_p9perm(perm, st->st_mode, false);
1442
1443	if (mkfifo(newname, perm) != 0)
1444		return (errno);
1445
1446	/* We cannot open the new name; race to use l* syscalls. */
1447	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1448	    fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
1449	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1450		error = errno;
1451	else if (!S_ISFIFO(st->st_mode))
1452		error = EPERM;		/* ??? lost a race anyway */
1453
1454	/* if (error) unlink(newname) ? */
1455
1456	return (error);
1457}
1458
1459/*
1460 * Internal form of mksocket.
1461 *
1462 * This is a bit different because of the horrible socket naming
1463 * system (bind() with sockaddr_un sun_path).
1464 */
1465static int
1466fs_imksocket(void *softc, struct l9p_fid *dir, char *name,
1467    bool isp9, mode_t perm, gid_t egid, struct stat *st)
1468{
1469	struct fs_fid *ff;
1470	struct sockaddr_un sun;
1471	char *path;
1472	char newname[MAXPATHLEN];
1473	gid_t gid;
1474	uid_t uid;
1475	int error = 0, s, fd;
1476
1477	ff = dir->lo_aux;
1478	error = fs_buildname(dir, name, newname, sizeof(newname));
1479	if (error)
1480		return (error);
1481
1482	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1483	if (error)
1484		return (error);
1485
1486	if (isp9)
1487		perm = fs_p9perm(perm, st->st_mode, false);
1488
1489	s = socket(AF_UNIX, SOCK_STREAM, 0);
1490	if (s < 0)
1491		return (errno);
1492
1493	path = newname;
1494	fd = -1;
1495#ifdef HAVE_BINDAT
1496	/* Try bindat() if needed. */
1497	if (strlen(path) >= sizeof(sun.sun_path)) {
1498		fd = openat(ff->ff_dirfd, ff->ff_name,
1499		    O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
1500		if (fd >= 0)
1501			path = name;
1502	}
1503#endif
1504
1505	/*
1506	 * Can only create the socket if the path will fit.
1507	 * Even if we are using bindat() there are limits
1508	 * (the API for AF_UNIX sockets is ... not good).
1509	 *
1510	 * Note: in theory we can fill sun_path to the end
1511	 * (omitting a terminating '\0') but in at least one
1512	 * Unix-like system, this was known to behave oddly,
1513	 * so we test for ">=" rather than just ">".
1514	 */
1515	if (strlen(path) >= sizeof(sun.sun_path)) {
1516		error = ENAMETOOLONG;
1517		goto out;
1518	}
1519	sun.sun_family = AF_UNIX;
1520	sun.sun_len = sizeof(struct sockaddr_un);
1521	strncpy(sun.sun_path, path, sizeof(sun.sun_path));
1522
1523#ifdef HAVE_BINDAT
1524	if (fd >= 0) {
1525		if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0)
1526			error = errno;
1527		goto out;	/* done now, for good or ill */
1528	}
1529#endif
1530
1531	if (bind(s, (struct sockaddr *)&sun, sun.sun_len) < 0)
1532		error = errno;
1533out:
1534
1535	if (error == 0) {
1536		/*
1537		 * We believe we created the socket-inode.  Fix
1538		 * permissions etc.  Note that we cannot use
1539		 * fstat() on the socket descriptor: it succeeds,
1540		 * but we get bogus data!
1541		 */
1542		if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1543		    fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
1544		    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1545			error = errno;
1546		else if (!S_ISSOCK(st->st_mode))
1547			error = EPERM;		/* ??? lost a race anyway */
1548
1549		/* if (error) unlink(newname) ? */
1550	}
1551
1552	/*
1553	 * It's not clear which error should override, although
1554	 * ideally we should never see either close() call fail.
1555	 * In any case we do want to try to close both fd and s,
1556	 * always.  Let's set error only if it is not already set,
1557	 * so that all exit paths can use the same code.
1558	 */
1559	if (fd >= 0 && close(fd) != 0)
1560		if (error == 0)
1561			error = errno;
1562	if (close(s) != 0)
1563		if (error == 0)
1564			error = errno;
1565
1566	return (error);
1567}
1568
1569/*
1570 * Internal form of symlink.
1571 *
1572 * Note that symlinks are presumed to carry no permission bits.
1573 * They do have owners, however (who may be charged for quotas).
1574 */
1575static int
1576fs_isymlink(void *softc, struct l9p_fid *dir, char *name,
1577    char *symtgt, gid_t egid, struct stat *st)
1578{
1579	struct fs_fid *ff;
1580	gid_t gid;
1581	uid_t uid;
1582	char newname[MAXPATHLEN];
1583	int error;
1584
1585	ff = dir->lo_aux;
1586	error = fs_buildname(dir, name, newname, sizeof(newname));
1587	if (error)
1588		return (error);
1589
1590	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1591	if (error)
1592		return (error);
1593
1594	if (symlinkat(symtgt, ff->ff_dirfd, newname) != 0)
1595		return (errno);
1596
1597	/* We cannot open the new name; race to use l* syscalls. */
1598	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1599	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1600		error = errno;
1601	else if (!S_ISLNK(st->st_mode))
1602		error = EPERM;		/* ??? lost a race anyway */
1603
1604	/* if (error) unlink(newname) ? */
1605
1606	return (error);
1607}
1608
1609static int
1610fs_open(void *softc, struct l9p_request *req)
1611{
1612	struct l9p_fid *fid = req->lr_fid;
1613	struct stat st;
1614	enum l9p_omode p9;
1615	int error, flags;
1616
1617	p9 = req->lr_req.topen.mode;
1618	error = fs_oflags_dotu(p9, &flags);
1619	if (error)
1620		return (error);
1621
1622	error = fs_iopen(softc, fid, flags, p9, (gid_t)-1, &st);
1623	if (error)
1624		return (error);
1625
1626	generate_qid(&st, &req->lr_resp.ropen.qid);
1627	req->lr_resp.ropen.iounit = req->lr_conn->lc_max_io_size;
1628	return (0);
1629}
1630
1631/*
1632 * Helper for directory read.  We want to run an lstat on each
1633 * file name within the directory.  This is a lot faster if we
1634 * have lstatat (or fstatat with AT_SYMLINK_NOFOLLOW), but not
1635 * all systems do, so hide the ifdef-ed code in an inline function.
1636 */
1637static inline int
1638fs_lstatat(struct fs_fid *file, char *name, struct stat *st)
1639{
1640
1641	return (fstatat(dirfd(file->ff_dir), name, st, AT_SYMLINK_NOFOLLOW));
1642}
1643
1644static int
1645fs_read(void *softc, struct l9p_request *req)
1646{
1647	struct l9p_stat l9stat;
1648	struct fs_softc *sc;
1649	struct fs_fid *file;
1650	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
1651	ssize_t ret;
1652
1653	sc = softc;
1654	file = req->lr_fid->lo_aux;
1655	assert(file != NULL);
1656
1657	if (file->ff_dir != NULL) {
1658		struct dirent *d;
1659		struct stat st;
1660		struct l9p_message msg;
1661		long o;
1662
1663		pthread_mutex_lock(&file->ff_mtx);
1664
1665		/*
1666		 * Must use telldir before readdir since seekdir
1667		 * takes cookie values.  Unfortunately this wastes
1668		 * a lot of time (and memory) building unneeded
1669		 * cookies that can only be flushed by closing
1670		 * the directory.
1671		 *
1672		 * NB: FreeBSD libc seekdir has SINGLEUSE defined,
1673		 * so in fact, we can discard the cookies by
1674		 * calling seekdir on them.  This clears up wasted
1675		 * memory at the cost of even more wasted time...
1676		 *
1677		 * XXX: readdir/telldir/seekdir not thread safe
1678		 */
1679		l9p_init_msg(&msg, req, L9P_PACK);
1680		for (;;) {
1681			o = telldir(file->ff_dir);
1682			d = readdir(file->ff_dir);
1683			if (d == NULL)
1684				break;
1685			if (fs_lstatat(file, d->d_name, &st))
1686				continue;
1687			dostat(sc, &l9stat, d->d_name, &st, dotu);
1688			if (l9p_pack_stat(&msg, req, &l9stat) != 0) {
1689				seekdir(file->ff_dir, o);
1690				break;
1691			}
1692#if defined(__FreeBSD__)
1693			seekdir(file->ff_dir, o);
1694			(void) readdir(file->ff_dir);
1695#endif
1696		}
1697
1698		pthread_mutex_unlock(&file->ff_mtx);
1699	} else {
1700		size_t niov = l9p_truncate_iov(req->lr_data_iov,
1701                    req->lr_data_niov, req->lr_req.io.count);
1702
1703#if defined(__FreeBSD__)
1704		ret = preadv(file->ff_fd, req->lr_data_iov, niov,
1705		    req->lr_req.io.offset);
1706#else
1707		/* XXX: not thread safe, should really use aio_listio. */
1708		if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
1709			return (errno);
1710
1711		ret = (uint32_t)readv(file->ff_fd, req->lr_data_iov, (int)niov);
1712#endif
1713
1714		if (ret < 0)
1715			return (errno);
1716
1717		req->lr_resp.io.count = (uint32_t)ret;
1718	}
1719
1720	return (0);
1721}
1722
1723static int
1724fs_remove(void *softc, struct l9p_fid *fid)
1725{
1726	struct fs_softc *sc = softc;
1727	struct l9p_acl *parent_acl;
1728	struct fs_fid *file;
1729	struct stat pst, cst;
1730	char dirname[MAXPATHLEN];
1731	int error;
1732
1733	if (sc->fs_readonly)
1734		return (EROFS);
1735
1736	error = fs_pdir(sc, fid, dirname, sizeof(dirname), &pst);
1737	if (error)
1738		return (error);
1739
1740	file = fid->lo_aux;
1741	if (fstatat(file->ff_dirfd, file->ff_name, &cst, AT_SYMLINK_NOFOLLOW) != 0)
1742		return (error);
1743
1744	parent_acl = getacl(file, -1, dirname);
1745	fillacl(file);
1746
1747	error = check_access(L9P_ACOP_UNLINK,
1748	    parent_acl, &pst, file->ff_acl, &cst, file->ff_ai, (gid_t)-1);
1749	l9p_acl_free(parent_acl);
1750	if (error)
1751		return (error);
1752
1753	if (unlinkat(file->ff_dirfd, file->ff_name,
1754	    S_ISDIR(cst.st_mode) ? AT_REMOVEDIR : 0) != 0)
1755		error = errno;
1756
1757	return (error);
1758}
1759
1760static int
1761fs_stat(void *softc, struct l9p_request *req)
1762{
1763	struct fs_softc *sc;
1764	struct fs_fid *file;
1765	struct stat st;
1766	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
1767
1768	sc = softc;
1769	file = req->lr_fid->lo_aux;
1770	assert(file);
1771
1772	if (fstatat(file->ff_dirfd, file->ff_name, &st,
1773	    AT_SYMLINK_NOFOLLOW) != 0)
1774		return (errno);
1775
1776	dostat(sc, &req->lr_resp.rstat.stat, file->ff_name, &st, dotu);
1777	return (0);
1778}
1779
1780static int
1781fs_walk(void *softc, struct l9p_request *req)
1782{
1783	struct l9p_acl *acl;
1784	struct fs_authinfo *ai;
1785	struct fs_fid *file = req->lr_fid->lo_aux;
1786	struct fs_fid *newfile;
1787	struct stat st;
1788	size_t clen, namelen, need;
1789	char *comp, *succ, *next, *swtmp;
1790	bool atroot;
1791	bool dotdot;
1792	int i, nwname;
1793	int error = 0;
1794	char namebufs[2][MAXPATHLEN];
1795
1796	/*
1797	 * https://swtch.com/plan9port/man/man9/walk.html:
1798	 *
1799	 *    It is legal for nwname to be zero, in which case newfid
1800	 *    will represent the same file as fid and the walk will
1801	 *    usually succeed; this is equivalent to walking to dot.
1802	 * [Aside: it's not clear if we should test S_ISDIR here.]
1803	 *    ...
1804	 *    The name ".." ... represents the parent directory.
1805	 *    The name "." ... is not used in the protocol.
1806	 *    ... A walk of the name ".." in the root directory
1807	 *    of the server is equivalent to a walk with no name
1808	 *    elements.
1809	 *
1810	 * Note that req.twalk.nwname never exceeds L9P_MAX_WELEM,
1811	 * so it is safe to convert to plain int.
1812	 *
1813	 * We are to return an error only if the first walk fails,
1814	 * else stop at the end of the names or on the first error.
1815	 * The final fid is based on the last name successfully
1816	 * walked.
1817	 *
1818	 * Note that we *do* get Twalk requests with nwname==0 on files.
1819	 *
1820	 * Set up "successful name" buffer pointer with base fid name,
1821	 * initially.  We'll swap each new success into it as we go.
1822	 *
1823	 * Invariant: atroot and stat data correspond to current
1824	 * (succ) path.
1825	 */
1826	succ = namebufs[0];
1827	next = namebufs[1];
1828	namelen = strlcpy(succ, file->ff_name, MAXPATHLEN);
1829	if (namelen >= MAXPATHLEN)
1830		return (ENAMETOOLONG);
1831	if (fstatat(file->ff_dirfd, succ, &st, AT_SYMLINK_NOFOLLOW) < 0)
1832		return (errno);
1833	ai = file->ff_ai;
1834	atroot = strlen(succ) == 0; /* XXX? */
1835	fillacl(file);
1836	acl = file->ff_acl;
1837
1838	nwname = (int)req->lr_req.twalk.nwname;
1839
1840	for (i = 0; i < nwname; i++) {
1841		/*
1842		 * Must have execute permission to search a directory.
1843		 * Then, look up each component in its directory-so-far.
1844		 * Check for ".." along the way, handlng specially
1845		 * as needed.  Forbid "/" in name components.
1846		 *
1847		 */
1848		if (!S_ISDIR(st.st_mode)) {
1849			error = ENOTDIR;
1850			goto out;
1851		}
1852		error = check_access(L9P_ACE_EXECUTE,
1853		     NULL, NULL, acl, &st, ai, (gid_t)-1);
1854		if (error) {
1855			L9P_LOG(L9P_DEBUG,
1856			    "Twalk: denying dir-walk on \"%s\" for uid %u",
1857			    succ, (unsigned)ai->ai_uid);
1858			error = EPERM;
1859			goto out;
1860		}
1861		comp = req->lr_req.twalk.wname[i];
1862		if (strchr(comp, '/') != NULL) {
1863			error = EINVAL;
1864			break;
1865		}
1866
1867		clen = strlen(comp);
1868		dotdot = false;
1869
1870		/*
1871		 * Build next pathname (into "next").  If "..",
1872		 * just strip one name component off the success
1873		 * name so far.  Since we know this name fits, the
1874		 * stripped down version also fits.  Otherwise,
1875		 * the name is the base name plus '/' plus the
1876		 * component name plus terminating '\0'; this may
1877		 * or may not fit.
1878		 */
1879		if (comp[0] == '.') {
1880			if (clen == 1) {
1881				error = EINVAL;
1882				break;
1883			}
1884			if (comp[1] == '.' && clen == 2)
1885				dotdot = true;
1886		}
1887		if (dotdot) {
1888			/*
1889			 * It's not clear how ".." at root should
1890			 * be handled when i > 0.  Obeying the man
1891			 * page exactly, we reset i to 0 and stop,
1892			 * declaring terminal success.
1893			 *
1894			 * Otherwise, we just climbed up one level
1895			 * so adjust "atroot".
1896			 */
1897			if (atroot) {
1898				i = 0;
1899				break;
1900			}
1901			(void) r_dirname(succ, next, MAXPATHLEN);
1902			namelen = strlen(next);
1903			atroot = strlen(next) == 0; /* XXX? */
1904		} else {
1905			need = namelen + 1 + clen + 1;
1906			if (need > MAXPATHLEN) {
1907				error = ENAMETOOLONG;
1908				break;
1909			}
1910			memcpy(next, succ, namelen);
1911			next[namelen++] = '/';
1912			memcpy(&next[namelen], comp, clen + 1);
1913			namelen += clen;
1914			/*
1915			 * Since name is never ".", we are necessarily
1916			 * descending below the root now.
1917			 */
1918			atroot = false;
1919		}
1920
1921		if (fstatat(file->ff_dirfd, next, &st, AT_SYMLINK_NOFOLLOW) < 0) {
1922			error = ENOENT;
1923			break;
1924		}
1925
1926		/*
1927		 * Success: generate qid and swap this
1928		 * successful name into place.  Update acl.
1929		 */
1930		generate_qid(&st, &req->lr_resp.rwalk.wqid[i]);
1931		swtmp = succ;
1932		succ = next;
1933		next = swtmp;
1934		if (acl != NULL && acl != file->ff_acl)
1935			l9p_acl_free(acl);
1936		acl = getacl(file, -1, next);
1937	}
1938
1939	/*
1940	 * Fail only if we failed on the first name.
1941	 * Otherwise we succeeded on something, and "succ"
1942	 * points to the last successful name in namebufs[].
1943	 */
1944	if (error) {
1945		if (i == 0)
1946			goto out;
1947		error = 0;
1948	}
1949
1950	newfile = open_fid(file->ff_dirfd, succ, ai, false);
1951	if (newfile == NULL) {
1952		error = ENOMEM;
1953		goto out;
1954	}
1955	if (req->lr_newfid == req->lr_fid) {
1956		/*
1957		 * Before overwriting fid->lo_aux, free the old value.
1958		 * Note that this doesn't free the l9p_fid data,
1959		 * just the fs_fid data.  (But it does ditch ff_acl.)
1960		 */
1961		if (acl == file->ff_acl)
1962			acl = NULL;
1963		fs_freefid(softc, req->lr_fid);
1964		file = NULL;
1965	}
1966	req->lr_newfid->lo_aux = newfile;
1967	if (file != NULL && acl != file->ff_acl) {
1968		newfile->ff_acl = acl;
1969		acl = NULL;
1970	}
1971	req->lr_resp.rwalk.nwqid = (uint16_t)i;
1972out:
1973	if (file != NULL && acl != file->ff_acl)
1974		l9p_acl_free(acl);
1975	return (error);
1976}
1977
1978static int
1979fs_write(void *softc, struct l9p_request *req)
1980{
1981	struct fs_softc *sc = softc;
1982	struct fs_fid *file;
1983	ssize_t ret;
1984
1985	file = req->lr_fid->lo_aux;
1986	assert(file != NULL);
1987
1988	if (sc->fs_readonly)
1989		return (EROFS);
1990
1991	size_t niov = l9p_truncate_iov(req->lr_data_iov,
1992            req->lr_data_niov, req->lr_req.io.count);
1993
1994#if defined(__FreeBSD__)
1995	ret = pwritev(file->ff_fd, req->lr_data_iov, niov,
1996	    req->lr_req.io.offset);
1997#else
1998	/* XXX: not thread safe, should really use aio_listio. */
1999	if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
2000		return (errno);
2001
2002	ret = writev(file->ff_fd, req->lr_data_iov,
2003	    (int)niov);
2004#endif
2005
2006	if (ret < 0)
2007		return (errno);
2008
2009	req->lr_resp.io.count = (uint32_t)ret;
2010	return (0);
2011}
2012
2013static int
2014fs_wstat(void *softc, struct l9p_request *req)
2015{
2016	struct fs_softc *sc = softc;
2017	struct l9p_stat *l9stat = &req->lr_req.twstat.stat;
2018	struct l9p_fid *fid;
2019	struct fs_fid *file;
2020	int error = 0;
2021
2022	fid = req->lr_fid;
2023	file = fid->lo_aux;
2024	assert(file != NULL);
2025
2026	/*
2027	 * XXX:
2028	 *
2029	 * stat(9P) sez:
2030	 *
2031	 * Either all the changes in wstat request happen, or none of them
2032	 * does: if the request succeeds, all changes were made; if it fails,
2033	 * none were.
2034	 *
2035	 * Atomicity is clearly missing in current implementation.
2036	 */
2037
2038	if (sc->fs_readonly)
2039		return (EROFS);
2040
2041	if (l9stat->atime != (uint32_t)~0) {
2042		/* XXX: not implemented, ignore */
2043	}
2044
2045	if (l9stat->mtime != (uint32_t)~0) {
2046		/* XXX: not implemented, ignore */
2047	}
2048
2049	if (l9stat->dev != (uint32_t)~0) {
2050		error = EPERM;
2051		goto out;
2052	}
2053
2054	if (l9stat->length != (uint64_t)~0) {
2055		if (file->ff_dir != NULL) {
2056			error = EINVAL;
2057			goto out;
2058		}
2059
2060		if (truncate(file->ff_name, (off_t)l9stat->length) != 0) {
2061			error = errno;
2062			goto out;
2063		}
2064	}
2065
2066	if (req->lr_conn->lc_version >= L9P_2000U) {
2067		if (fchownat(file->ff_dirfd, file->ff_name, l9stat->n_uid,
2068		    l9stat->n_gid, AT_SYMLINK_NOFOLLOW) != 0) {
2069			error = errno;
2070			goto out;
2071		}
2072	}
2073
2074	if (l9stat->mode != (uint32_t)~0) {
2075		if (fchmodat(file->ff_dirfd, file->ff_name,
2076		    l9stat->mode & 0777, 0) != 0) {
2077			error = errno;
2078			goto out;
2079		}
2080	}
2081
2082	if (strlen(l9stat->name) > 0) {
2083		struct l9p_acl *parent_acl;
2084		struct stat st;
2085		char *tmp;
2086		char newname[MAXPATHLEN];
2087
2088		/*
2089		 * Rename-within-directory: it's not deleting anything,
2090		 * but we need write permission on the directory.  This
2091		 * should suffice.
2092		 */
2093		error = fs_pdir(softc, fid, newname, sizeof(newname), &st);
2094		if (error)
2095			goto out;
2096		parent_acl = getacl(file, -1, newname);
2097		error = check_access(L9P_ACE_ADD_FILE,
2098		    parent_acl, &st, NULL, NULL, file->ff_ai, (gid_t)-1);
2099		l9p_acl_free(parent_acl);
2100		if (error)
2101			goto out;
2102		error = fs_dpf(newname, l9stat->name, sizeof(newname));
2103		if (error)
2104			goto out;
2105		tmp = strdup(newname);
2106		if (tmp == NULL) {
2107			error = ENOMEM;
2108			goto out;
2109		}
2110		if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
2111		    tmp) != 0) {
2112			error = errno;
2113			free(tmp);
2114			goto out;
2115		}
2116		/* Successful rename, update file->ff_name.  ACL can stay. */
2117		free(file->ff_name);
2118		file->ff_name = tmp;
2119	}
2120out:
2121	return (error);
2122}
2123
2124static int
2125fs_statfs(void *softc __unused, struct l9p_request *req)
2126{
2127	struct fs_fid *file;
2128	struct stat st;
2129	struct statfs f;
2130	long name_max;
2131	int error;
2132	int fd;
2133
2134	file = req->lr_fid->lo_aux;
2135	assert(file);
2136
2137	if (fstatat(file->ff_dirfd, file->ff_name, &st,
2138	    AT_SYMLINK_NOFOLLOW) != 0)
2139		return (errno);
2140
2141	/*
2142	 * Not entirely clear what access to require; we'll go
2143	 * for "read data".
2144	 */
2145	fillacl(file);
2146	error = check_access(L9P_ACE_READ_DATA, NULL, NULL,
2147	    file->ff_acl, &st, file->ff_ai, (gid_t)-1);
2148	if (error)
2149		return (error);
2150
2151	fd = openat(file->ff_dirfd, file->ff_name, 0);
2152	if (fd < 0)
2153		return (errno);
2154
2155	if (fstatfs(fd, &f) != 0)
2156		return (errno);
2157
2158	name_max = fpathconf(fd, _PC_NAME_MAX);
2159	error = errno;
2160	close(fd);
2161
2162	if (name_max == -1)
2163		return (error);
2164
2165	dostatfs(&req->lr_resp.rstatfs.statfs, &f, name_max);
2166
2167	return (0);
2168}
2169
2170static int
2171fs_lopen(void *softc, struct l9p_request *req)
2172{
2173	struct l9p_fid *fid = req->lr_fid;
2174	struct stat st;
2175	enum l9p_omode p9;
2176	gid_t gid;
2177	int error, flags;
2178
2179	error = fs_oflags_dotl(req->lr_req.tlopen.flags, &flags, &p9);
2180	if (error)
2181		return (error);
2182
2183	gid = req->lr_req.tlopen.gid;
2184	error = fs_iopen(softc, fid, flags, p9, gid, &st);
2185	if (error)
2186		return (error);
2187
2188	generate_qid(&st, &req->lr_resp.rlopen.qid);
2189	req->lr_resp.rlopen.iounit = req->lr_conn->lc_max_io_size;
2190	return (0);
2191}
2192
2193static int
2194fs_lcreate(void *softc, struct l9p_request *req)
2195{
2196	struct l9p_fid *dir;
2197	struct stat st;
2198	enum l9p_omode p9;
2199	char *name;
2200	mode_t perm;
2201	gid_t gid;
2202	int error, flags;
2203
2204	dir = req->lr_fid;
2205	name = req->lr_req.tlcreate.name;
2206
2207	error = fs_oflags_dotl(req->lr_req.tlcreate.flags, &flags, &p9);
2208	if (error)
2209		return (error);
2210
2211	perm = (mode_t)req->lr_req.tlcreate.mode & 0777; /* ? set-id bits? */
2212	gid = req->lr_req.tlcreate.gid;
2213	error = fs_icreate(softc, dir, name, flags, false, perm, gid, &st);
2214	if (error == 0)
2215		generate_qid(&st, &req->lr_resp.rlcreate.qid);
2216	req->lr_resp.rlcreate.iounit = req->lr_conn->lc_max_io_size;
2217	return (error);
2218}
2219
2220static int
2221fs_symlink(void *softc, struct l9p_request *req)
2222{
2223	struct l9p_fid *dir;
2224	struct stat st;
2225	gid_t gid;
2226	char *name, *symtgt;
2227	int error;
2228
2229	dir = req->lr_fid;
2230	name = req->lr_req.tsymlink.name;
2231	symtgt = req->lr_req.tsymlink.symtgt;
2232	gid = req->lr_req.tsymlink.gid;
2233	error = fs_isymlink(softc, dir, name, symtgt, gid, &st);
2234	if (error == 0)
2235		generate_qid(&st, &req->lr_resp.rsymlink.qid);
2236	return (error);
2237}
2238
2239static int
2240fs_mknod(void *softc, struct l9p_request *req)
2241{
2242	struct l9p_fid *dir;
2243	struct stat st;
2244	uint32_t mode, major, minor;
2245	dev_t dev;
2246	gid_t gid;
2247	char *name;
2248	int error;
2249
2250	dir = req->lr_fid;
2251	name = req->lr_req.tmknod.name;
2252	mode = req->lr_req.tmknod.mode;
2253	gid = req->lr_req.tmknod.gid;
2254
2255	switch (mode & S_IFMT) {
2256	case S_IFBLK:
2257	case S_IFCHR:
2258		mode = (mode & S_IFMT) | (mode & 0777);	/* ??? */
2259		major = req->lr_req.tmknod.major;
2260		minor = req->lr_req.tmknod.major;
2261		dev = makedev(major, minor);
2262		error = fs_imknod(softc, dir, name, false,
2263		    (mode_t)mode, dev, gid, &st);
2264		break;
2265
2266	case S_IFIFO:
2267		error = fs_imkfifo(softc, dir, name, false,
2268		    (mode_t)(mode & 0777), gid, &st);
2269		break;
2270
2271	case S_IFSOCK:
2272		error = fs_imksocket(softc, dir, name, false,
2273		    (mode_t)(mode & 0777), gid, &st);
2274		break;
2275
2276	default:
2277		error = EINVAL;
2278		break;
2279	}
2280	if (error == 0)
2281		generate_qid(&st, &req->lr_resp.rmknod.qid);
2282	return (error);
2283}
2284
2285static int
2286fs_rename(void *softc, struct l9p_request *req)
2287{
2288	struct fs_softc *sc = softc;
2289	struct fs_authinfo *ai;
2290	struct l9p_acl *oparent_acl;
2291	struct l9p_fid *fid, *f2;
2292	struct fs_fid *file, *f2ff;
2293	struct stat cst, opst, npst;
2294	int32_t op;
2295	bool reparenting;
2296	char *tmp;
2297	char olddir[MAXPATHLEN], newname[MAXPATHLEN];
2298	int error;
2299
2300	if (sc->fs_readonly)
2301		return (EROFS);
2302
2303	/*
2304	 * Note: lr_fid represents the file that is to be renamed,
2305	 * so we must locate its parent directory and verify that
2306	 * both this parent directory and the new directory f2 are
2307	 * writable.  But if the new parent directory is the same
2308	 * path as the old parent directory, our job is simpler.
2309	 */
2310	fid = req->lr_fid;
2311	file = fid->lo_aux;
2312	assert(file != NULL);
2313	ai = file->ff_ai;
2314
2315	error = fs_pdir(sc, fid, olddir, sizeof(olddir), &opst);
2316	if (error)
2317		return (error);
2318
2319	f2 = req->lr_fid2;
2320	f2ff = f2->lo_aux;
2321	assert(f2ff != NULL);
2322
2323	reparenting = strcmp(olddir, f2ff->ff_name) != 0;
2324
2325	fillacl(file);
2326	fillacl(f2ff);
2327
2328	if (fstatat(file->ff_dirfd, file->ff_name, &cst,
2329	    AT_SYMLINK_NOFOLLOW) != 0)
2330		return (errno);
2331
2332	/*
2333	 * Are we moving from olddir?  If so, we're unlinking
2334	 * from it, in terms of ACL access.
2335	 */
2336	if (reparenting) {
2337		oparent_acl = getacl(file, -1, olddir);
2338		error = check_access(L9P_ACOP_UNLINK,
2339		    oparent_acl, &opst, file->ff_acl, &cst, ai, (gid_t)-1);
2340		l9p_acl_free(oparent_acl);
2341		if (error)
2342			return (error);
2343	}
2344
2345	/*
2346	 * Now check that we're allowed to "create" a file or directory in
2347	 * f2.  (Should we do this, too, only if reparenting?  Maybe check
2348	 * for dir write permission if not reparenting -- but that's just
2349	 * add-file/add-subdir, which means doing this always.)
2350	 */
2351	if (fstatat(f2ff->ff_dirfd, f2ff->ff_name, &npst,
2352	    AT_SYMLINK_NOFOLLOW) != 0)
2353		return (errno);
2354
2355	op = S_ISDIR(cst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
2356	error = check_access(op, f2ff->ff_acl, &npst, NULL, NULL,
2357	    ai, (gid_t)-1);
2358	if (error)
2359		return (error);
2360
2361	/*
2362	 * Directories OK, file systems not R/O, etc; build final name.
2363	 * f2ff->ff_name cannot exceed MAXPATHLEN, but out of general
2364	 * paranoia, let's double check anyway.
2365	 */
2366	if (strlcpy(newname, f2ff->ff_name, sizeof(newname)) >= sizeof(newname))
2367		return (ENAMETOOLONG);
2368	error = fs_dpf(newname, req->lr_req.trename.name, sizeof(newname));
2369	if (error)
2370		return (error);
2371	tmp = strdup(newname);
2372	if (tmp == NULL)
2373		return (ENOMEM);
2374
2375	if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, tmp) != 0) {
2376		error = errno;
2377		free(tmp);
2378		return (error);
2379	}
2380
2381	/* file has been renamed but old fid is not clunked */
2382	free(file->ff_name);
2383	file->ff_name = tmp;
2384
2385	dropacl(file);
2386	return (0);
2387}
2388
2389static int
2390fs_readlink(void *softc __unused, struct l9p_request *req)
2391{
2392	struct fs_fid *file;
2393	ssize_t linklen;
2394	char buf[MAXPATHLEN];
2395	int error = 0;
2396
2397	file = req->lr_fid->lo_aux;
2398	assert(file);
2399
2400	linklen = readlinkat(file->ff_dirfd, file->ff_name, buf, sizeof(buf));
2401	if (linklen < 0)
2402		error = errno;
2403	else if ((size_t)linklen >= sizeof(buf))
2404		error = ENOMEM; /* todo: allocate dynamically */
2405	else if ((req->lr_resp.rreadlink.target = strndup(buf,
2406	    (size_t)linklen)) == NULL)
2407		error = ENOMEM;
2408	return (error);
2409}
2410
2411static int
2412fs_getattr(void *softc __unused, struct l9p_request *req)
2413{
2414	uint64_t mask, valid;
2415	struct fs_fid *file;
2416	struct stat st;
2417	int error = 0;
2418
2419	file = req->lr_fid->lo_aux;
2420	assert(file);
2421
2422	valid = 0;
2423	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
2424		error = errno;
2425		goto out;
2426	}
2427	/* ?? Can we provide items not-requested? If so, can skip tests. */
2428	mask = req->lr_req.tgetattr.request_mask;
2429	if (mask & L9PL_GETATTR_MODE) {
2430		/* It is not clear if we need any translations. */
2431		req->lr_resp.rgetattr.mode = st.st_mode;
2432		valid |= L9PL_GETATTR_MODE;
2433	}
2434	if (mask & L9PL_GETATTR_NLINK) {
2435		req->lr_resp.rgetattr.nlink = st.st_nlink;
2436		valid |= L9PL_GETATTR_NLINK;
2437	}
2438	if (mask & L9PL_GETATTR_UID) {
2439		/* provide st_uid, or file->ff_uid? */
2440		req->lr_resp.rgetattr.uid = st.st_uid;
2441		valid |= L9PL_GETATTR_UID;
2442	}
2443	if (mask & L9PL_GETATTR_GID) {
2444		/* provide st_gid, or file->ff_gid? */
2445		req->lr_resp.rgetattr.gid = st.st_gid;
2446		valid |= L9PL_GETATTR_GID;
2447	}
2448	if (mask & L9PL_GETATTR_RDEV) {
2449		/* It is not clear if we need any translations. */
2450		req->lr_resp.rgetattr.rdev = (uint64_t)st.st_rdev;
2451		valid |= L9PL_GETATTR_RDEV;
2452	}
2453	if (mask & L9PL_GETATTR_ATIME) {
2454		req->lr_resp.rgetattr.atime_sec =
2455		    (uint64_t)st.st_atimespec.tv_sec;
2456		req->lr_resp.rgetattr.atime_nsec =
2457		    (uint64_t)st.st_atimespec.tv_nsec;
2458		valid |= L9PL_GETATTR_ATIME;
2459	}
2460	if (mask & L9PL_GETATTR_MTIME) {
2461		req->lr_resp.rgetattr.mtime_sec =
2462		    (uint64_t)st.st_mtimespec.tv_sec;
2463		req->lr_resp.rgetattr.mtime_nsec =
2464		    (uint64_t)st.st_mtimespec.tv_nsec;
2465		valid |= L9PL_GETATTR_MTIME;
2466	}
2467	if (mask & L9PL_GETATTR_CTIME) {
2468		req->lr_resp.rgetattr.ctime_sec =
2469		    (uint64_t)st.st_ctimespec.tv_sec;
2470		req->lr_resp.rgetattr.ctime_nsec =
2471		    (uint64_t)st.st_ctimespec.tv_nsec;
2472		valid |= L9PL_GETATTR_CTIME;
2473	}
2474	if (mask & L9PL_GETATTR_BTIME) {
2475#if defined(HAVE_BIRTHTIME)
2476		req->lr_resp.rgetattr.btime_sec =
2477		    (uint64_t)st.st_birthtim.tv_sec;
2478		req->lr_resp.rgetattr.btime_nsec =
2479		    (uint64_t)st.st_birthtim.tv_nsec;
2480#else
2481		req->lr_resp.rgetattr.btime_sec = 0;
2482		req->lr_resp.rgetattr.btime_nsec = 0;
2483#endif
2484		valid |= L9PL_GETATTR_BTIME;
2485	}
2486	if (mask & L9PL_GETATTR_INO)
2487		valid |= L9PL_GETATTR_INO;
2488	if (mask & L9PL_GETATTR_SIZE) {
2489		req->lr_resp.rgetattr.size = (uint64_t)st.st_size;
2490		valid |= L9PL_GETATTR_SIZE;
2491	}
2492	if (mask & L9PL_GETATTR_BLOCKS) {
2493		req->lr_resp.rgetattr.blksize = (uint64_t)st.st_blksize;
2494		req->lr_resp.rgetattr.blocks = (uint64_t)st.st_blocks;
2495		valid |= L9PL_GETATTR_BLOCKS;
2496	}
2497	if (mask & L9PL_GETATTR_GEN) {
2498		req->lr_resp.rgetattr.gen = st.st_gen;
2499		valid |= L9PL_GETATTR_GEN;
2500	}
2501	/* don't know what to do with data version yet */
2502
2503	generate_qid(&st, &req->lr_resp.rgetattr.qid);
2504out:
2505	req->lr_resp.rgetattr.valid = valid;
2506	return (error);
2507}
2508
2509/*
2510 * Should combine some of this with wstat code.
2511 */
2512static int
2513fs_setattr(void *softc, struct l9p_request *req)
2514{
2515	uint64_t mask;
2516	struct fs_softc *sc = softc;
2517	struct timespec ts[2];
2518	struct fs_fid *file;
2519	struct stat st;
2520	int error = 0;
2521	uid_t uid, gid;
2522
2523	file = req->lr_fid->lo_aux;
2524	assert(file);
2525
2526	if (sc->fs_readonly)
2527		return (EROFS);
2528
2529	/*
2530	 * As with WSTAT we have atomicity issues.
2531	 */
2532	mask = req->lr_req.tsetattr.valid;
2533
2534	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
2535		error = errno;
2536		goto out;
2537	}
2538
2539	if ((mask & L9PL_SETATTR_SIZE) && S_ISDIR(st.st_mode)) {
2540		error = EISDIR;
2541		goto out;
2542	}
2543
2544	if (mask & L9PL_SETATTR_MODE) {
2545		if (fchmodat(file->ff_dirfd, file->ff_name,
2546		    req->lr_req.tsetattr.mode & 0777,
2547		    AT_SYMLINK_NOFOLLOW)) {
2548			error = errno;
2549			goto out;
2550		}
2551	}
2552
2553	if (mask & (L9PL_SETATTR_UID | L9PL_SETATTR_GID)) {
2554		uid = mask & L9PL_SETATTR_UID
2555		    ? req->lr_req.tsetattr.uid
2556		    : (uid_t)-1;
2557
2558		gid = mask & L9PL_SETATTR_GID
2559		    ? req->lr_req.tsetattr.gid
2560		    : (gid_t)-1;
2561
2562		if (fchownat(file->ff_dirfd, file->ff_name, uid, gid,
2563		    AT_SYMLINK_NOFOLLOW)) {
2564			error = errno;
2565			goto out;
2566		}
2567	}
2568
2569	if (mask & L9PL_SETATTR_SIZE) {
2570		/* Truncate follows symlinks, is this OK? */
2571		int fd = openat(file->ff_dirfd, file->ff_name, O_RDWR);
2572		if (ftruncate(fd, (off_t)req->lr_req.tsetattr.size)) {
2573			error = errno;
2574			(void) close(fd);
2575			goto out;
2576		}
2577		(void) close(fd);
2578	}
2579
2580	if (mask & (L9PL_SETATTR_ATIME | L9PL_SETATTR_MTIME)) {
2581		ts[0].tv_sec = st.st_atimespec.tv_sec;
2582		ts[0].tv_nsec = st.st_atimespec.tv_nsec;
2583		ts[1].tv_sec = st.st_mtimespec.tv_sec;
2584		ts[1].tv_nsec = st.st_mtimespec.tv_nsec;
2585
2586		if (mask & L9PL_SETATTR_ATIME) {
2587			if (mask & L9PL_SETATTR_ATIME_SET) {
2588				ts[0].tv_sec = req->lr_req.tsetattr.atime_sec;
2589				ts[0].tv_nsec = req->lr_req.tsetattr.atime_nsec;
2590			} else {
2591				if (clock_gettime(CLOCK_REALTIME, &ts[0]) != 0) {
2592					error = errno;
2593					goto out;
2594				}
2595			}
2596		}
2597
2598		if (mask & L9PL_SETATTR_MTIME) {
2599			if (mask & L9PL_SETATTR_MTIME_SET) {
2600				ts[1].tv_sec = req->lr_req.tsetattr.mtime_sec;
2601				ts[1].tv_nsec = req->lr_req.tsetattr.mtime_nsec;
2602			} else {
2603				if (clock_gettime(CLOCK_REALTIME, &ts[1]) != 0) {
2604					error = errno;
2605					goto out;
2606				}
2607			}
2608		}
2609
2610		if (utimensat(file->ff_dirfd, file->ff_name, ts,
2611		    AT_SYMLINK_NOFOLLOW)) {
2612			error = errno;
2613			goto out;
2614		}
2615	}
2616out:
2617	return (error);
2618}
2619
2620static int
2621fs_xattrwalk(void *softc __unused, struct l9p_request *req __unused)
2622{
2623	return (EOPNOTSUPP);
2624}
2625
2626static int
2627fs_xattrcreate(void *softc __unused, struct l9p_request *req __unused)
2628{
2629	return (EOPNOTSUPP);
2630}
2631
2632static int
2633fs_readdir(void *softc __unused, struct l9p_request *req)
2634{
2635	struct l9p_message msg;
2636	struct l9p_dirent de;
2637	struct fs_fid *file;
2638	struct dirent *dp;
2639	struct stat st;
2640	uint32_t count;
2641	int error = 0;
2642
2643	file = req->lr_fid->lo_aux;
2644	assert(file);
2645
2646	if (file->ff_dir == NULL)
2647		return (ENOTDIR);
2648
2649	pthread_mutex_lock(&file->ff_mtx);
2650
2651	/*
2652	 * It's not clear whether we can use the same trick for
2653	 * discarding offsets here as we do in fs_read.  It
2654	 * probably should work, we'll have to see if some
2655	 * client(s) use the zero-offset thing to rescan without
2656	 * clunking the directory first.
2657	 *
2658	 * Probably the thing to do is switch to calling
2659	 * getdirentries() / getdents() directly, instead of
2660	 * going through libc.
2661	 */
2662	if (req->lr_req.io.offset == 0)
2663		rewinddir(file->ff_dir);
2664	else
2665		seekdir(file->ff_dir, (long)req->lr_req.io.offset);
2666
2667	l9p_init_msg(&msg, req, L9P_PACK);
2668	count = (uint32_t)msg.lm_size; /* in case we get no entries */
2669	while ((dp = readdir(file->ff_dir)) != NULL) {
2670		/*
2671		 * Although "." is forbidden in naming and ".." is
2672		 * special cased, testing shows that we must transmit
2673		 * them through readdir.  (For ".." at root, we
2674		 * should perhaps alter the inode number, but not
2675		 * yet.)
2676		 */
2677
2678		/*
2679		 * TODO: we do a full lstat here; could use dp->d_*
2680		 * to construct the qid more efficiently, as long
2681		 * as dp->d_type != DT_UNKNOWN.
2682		 */
2683		if (fs_lstatat(file, dp->d_name, &st))
2684			continue;
2685
2686		de.qid.type = 0;
2687		generate_qid(&st, &de.qid);
2688		de.offset = (uint64_t)telldir(file->ff_dir);
2689		de.type = dp->d_type;
2690		de.name = dp->d_name;
2691
2692		/* Update count only if we completely pack the dirent. */
2693		if (l9p_pudirent(&msg, &de) < 0)
2694			break;
2695		count = (uint32_t)msg.lm_size;
2696	}
2697
2698	pthread_mutex_unlock(&file->ff_mtx);
2699	req->lr_resp.io.count = count;
2700	return (error);
2701}
2702
2703static int
2704fs_fsync(void *softc __unused, struct l9p_request *req)
2705{
2706	struct fs_fid *file;
2707	int error = 0;
2708
2709	file = req->lr_fid->lo_aux;
2710	assert(file);
2711	if (fsync(file->ff_dir != NULL ? dirfd(file->ff_dir) : file->ff_fd))
2712		error = errno;
2713	return (error);
2714}
2715
2716static int
2717fs_lock(void *softc __unused, struct l9p_request *req)
2718{
2719
2720	switch (req->lr_req.tlock.type) {
2721	case L9PL_LOCK_TYPE_RDLOCK:
2722	case L9PL_LOCK_TYPE_WRLOCK:
2723	case L9PL_LOCK_TYPE_UNLOCK:
2724		break;
2725	default:
2726		return (EINVAL);
2727	}
2728
2729	req->lr_resp.rlock.status = L9PL_LOCK_SUCCESS;
2730	return (0);
2731}
2732
2733static int
2734fs_getlock(void *softc __unused, struct l9p_request *req)
2735{
2736
2737	/*
2738	 * Client wants to see if a request to lock a region would
2739	 * block.  This is, of course, not atomic anyway, so the
2740	 * op is useless.  QEMU simply says "unlocked!", so we do
2741	 * too.
2742	 */
2743	switch (req->lr_req.getlock.type) {
2744	case L9PL_LOCK_TYPE_RDLOCK:
2745	case L9PL_LOCK_TYPE_WRLOCK:
2746	case L9PL_LOCK_TYPE_UNLOCK:
2747		break;
2748	default:
2749		return (EINVAL);
2750	}
2751
2752	req->lr_resp.getlock = req->lr_req.getlock;
2753	req->lr_resp.getlock.type = L9PL_LOCK_TYPE_UNLOCK;
2754	req->lr_resp.getlock.client_id = strdup("");  /* XXX what should go here? */
2755	return (0);
2756}
2757
2758static int
2759fs_link(void *softc __unused, struct l9p_request *req)
2760{
2761	struct l9p_fid *dir;
2762	struct fs_fid *file;
2763	struct fs_fid *dirf;
2764	struct stat fst, tdst;
2765	int32_t op;
2766	char *name;
2767	char newname[MAXPATHLEN];
2768	int error;
2769
2770	/* N.B.: lr_fid is the file to link, lr_fid2 is the target dir */
2771	dir = req->lr_fid2;
2772	dirf = dir->lo_aux;
2773	assert(dirf != NULL);
2774
2775	name = req->lr_req.tlink.name;
2776	error = fs_buildname(dir, name, newname, sizeof(newname));
2777	if (error)
2778		return (error);
2779
2780	file = req->lr_fid->lo_aux;
2781	assert(file != NULL);
2782
2783	if (fstatat(dirf->ff_dirfd, dirf->ff_name, &tdst, AT_SYMLINK_NOFOLLOW) != 0 ||
2784	    fstatat(file->ff_dirfd, file->ff_name, &fst, AT_SYMLINK_NOFOLLOW) != 0)
2785		return (errno);
2786	if (S_ISDIR(fst.st_mode))
2787		return (EISDIR);
2788	fillacl(dirf);
2789	op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
2790	error = check_access(op,
2791	    dirf->ff_acl, &tdst, NULL, NULL, file->ff_ai, (gid_t)-1);
2792	if (error)
2793		return (error);
2794
2795	if (linkat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
2796	    newname, 0) != 0)
2797		error = errno;
2798	else
2799		dropacl(file);
2800
2801	return (error);
2802}
2803
2804static int
2805fs_mkdir(void *softc, struct l9p_request *req)
2806{
2807	struct l9p_fid *dir;
2808	struct stat st;
2809	mode_t perm;
2810	gid_t gid;
2811	char *name;
2812	int error;
2813
2814	dir = req->lr_fid;
2815	name = req->lr_req.tmkdir.name;
2816	perm = (mode_t)req->lr_req.tmkdir.mode;
2817	gid = req->lr_req.tmkdir.gid;
2818
2819	error = fs_imkdir(softc, dir, name, false, perm, gid, &st);
2820	if (error == 0)
2821		generate_qid(&st, &req->lr_resp.rmkdir.qid);
2822	return (error);
2823}
2824
2825static int
2826fs_renameat(void *softc, struct l9p_request *req)
2827{
2828	struct fs_softc *sc = softc;
2829	struct l9p_fid *olddir, *newdir;
2830	struct l9p_acl *facl;
2831	struct fs_fid *off, *nff;
2832	struct stat odst, ndst, fst;
2833	int32_t op;
2834	bool reparenting;
2835	char *onp, *nnp;
2836	char onb[MAXPATHLEN], nnb[MAXPATHLEN];
2837	int error;
2838
2839	if (sc->fs_readonly)
2840		return (EROFS);
2841
2842	olddir = req->lr_fid;
2843	newdir = req->lr_fid2;
2844	assert(olddir != NULL && newdir != NULL);
2845	off = olddir->lo_aux;
2846	nff = newdir->lo_aux;
2847	assert(off != NULL && nff != NULL);
2848
2849	onp = req->lr_req.trenameat.oldname;
2850	nnp = req->lr_req.trenameat.newname;
2851	error = fs_buildname(olddir, onp, onb, sizeof(onb));
2852	if (error)
2853		return (error);
2854	error = fs_buildname(newdir, nnp, nnb, sizeof(nnb));
2855	if (error)
2856		return (error);
2857	if (fstatat(off->ff_dirfd, onb, &fst, AT_SYMLINK_NOFOLLOW) != 0)
2858		return (errno);
2859
2860	reparenting = olddir != newdir &&
2861	    strcmp(off->ff_name, nff->ff_name) != 0;
2862
2863	if (fstatat(off->ff_dirfd, off->ff_name, &odst, AT_SYMLINK_NOFOLLOW) != 0)
2864		return (errno);
2865	if (!S_ISDIR(odst.st_mode))
2866		return (ENOTDIR);
2867	fillacl(off);
2868
2869	if (reparenting) {
2870		if (fstatat(nff->ff_dirfd, nff->ff_name, &ndst, AT_SYMLINK_NOFOLLOW) != 0)
2871			return (errno);
2872		if (!S_ISDIR(ndst.st_mode))
2873			return (ENOTDIR);
2874		facl = getacl(off, -1, onb);
2875		fillacl(nff);
2876
2877		error = check_access(L9P_ACOP_UNLINK,
2878		    off->ff_acl, &odst, facl, &fst, off->ff_ai, (gid_t)-1);
2879		l9p_acl_free(facl);
2880		if (error)
2881			return (error);
2882		op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY :
2883		    L9P_ACE_ADD_FILE;
2884		error = check_access(op,
2885		    nff->ff_acl, &ndst, NULL, NULL, nff->ff_ai, (gid_t)-1);
2886		if (error)
2887			return (error);
2888	}
2889
2890	if (renameat(off->ff_dirfd, onb, nff->ff_dirfd, nnb))
2891		error = errno;
2892
2893	return (error);
2894}
2895
2896/*
2897 * Unlink file in given directory, or remove directory in given
2898 * directory, based on flags.
2899 */
2900static int
2901fs_unlinkat(void *softc, struct l9p_request *req)
2902{
2903	struct fs_softc *sc = softc;
2904	struct l9p_acl *facl;
2905	struct l9p_fid *dir;
2906	struct fs_fid *dirff;
2907	struct stat dirst, fst;
2908	char *name;
2909	char newname[MAXPATHLEN];
2910	int error;
2911
2912	if (sc->fs_readonly)
2913		return (EROFS);
2914
2915	dir = req->lr_fid;
2916	dirff = dir->lo_aux;
2917	assert(dirff != NULL);
2918	name = req->lr_req.tunlinkat.name;
2919	error = fs_buildname(dir, name, newname, sizeof(newname));
2920	if (error)
2921		return (error);
2922	if (fstatat(dirff->ff_dirfd, newname, &fst, AT_SYMLINK_NOFOLLOW) != 0 ||
2923	    fstatat(dirff->ff_dirfd, dirff->ff_name, &dirst, AT_SYMLINK_NOFOLLOW) != 0)
2924		return (errno);
2925	fillacl(dirff);
2926	facl = getacl(dirff, -1, newname);
2927	error = check_access(L9P_ACOP_UNLINK,
2928	    dirff->ff_acl, &dirst, facl, &fst, dirff->ff_ai, (gid_t)-1);
2929	l9p_acl_free(facl);
2930	if (error)
2931		return (error);
2932
2933	if (req->lr_req.tunlinkat.flags & L9PL_AT_REMOVEDIR) {
2934		if (unlinkat(dirff->ff_dirfd, newname, AT_REMOVEDIR) != 0)
2935			error = errno;
2936	} else {
2937		if (unlinkat(dirff->ff_dirfd, newname, 0) != 0)
2938			error = errno;
2939	}
2940	return (error);
2941}
2942
2943static void
2944fs_freefid(void *softc __unused, struct l9p_fid *fid)
2945{
2946	struct fs_fid *f = fid->lo_aux;
2947	struct fs_authinfo *ai;
2948	uint32_t newcount;
2949
2950	if (f == NULL) {
2951		/* Nothing to do here */
2952		return;
2953	}
2954
2955	if (f->ff_fd != -1)
2956		close(f->ff_fd);
2957
2958	if (f->ff_dir)
2959		closedir(f->ff_dir);
2960
2961	pthread_mutex_destroy(&f->ff_mtx);
2962	free(f->ff_name);
2963	ai = f->ff_ai;
2964	l9p_acl_free(f->ff_acl);
2965	free(f);
2966	pthread_mutex_lock(&ai->ai_mtx);
2967	newcount = --ai->ai_refcnt;
2968	pthread_mutex_unlock(&ai->ai_mtx);
2969	if (newcount == 0) {
2970		/*
2971		 * We *were* the last ref, no one can have gained a ref.
2972		 */
2973		L9P_LOG(L9P_DEBUG, "dropped last ref to authinfo %p",
2974		    (void *)ai);
2975		pthread_mutex_destroy(&ai->ai_mtx);
2976		free(ai);
2977	} else {
2978		L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
2979		    (void *)ai, (u_long)newcount);
2980	}
2981}
2982
2983int
2984l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro)
2985{
2986	struct l9p_backend *backend;
2987	struct fs_softc *sc;
2988	int error;
2989#if defined(WITH_CASPER)
2990	cap_channel_t *capcas;
2991#endif
2992
2993	if (!fs_attach_mutex_inited) {
2994		error = pthread_mutex_init(&fs_attach_mutex, NULL);
2995		if (error) {
2996			errno = error;
2997			return (-1);
2998		}
2999		fs_attach_mutex_inited = true;
3000	}
3001
3002	backend = l9p_malloc(sizeof(*backend));
3003	backend->attach = fs_attach;
3004	backend->clunk = fs_clunk;
3005	backend->create = fs_create;
3006	backend->open = fs_open;
3007	backend->read = fs_read;
3008	backend->remove = fs_remove;
3009	backend->stat = fs_stat;
3010	backend->walk = fs_walk;
3011	backend->write = fs_write;
3012	backend->wstat = fs_wstat;
3013	backend->statfs = fs_statfs;
3014	backend->lopen = fs_lopen;
3015	backend->lcreate = fs_lcreate;
3016	backend->symlink = fs_symlink;
3017	backend->mknod = fs_mknod;
3018	backend->rename = fs_rename;
3019	backend->readlink = fs_readlink;
3020	backend->getattr = fs_getattr;
3021	backend->setattr = fs_setattr;
3022	backend->xattrwalk = fs_xattrwalk;
3023	backend->xattrcreate = fs_xattrcreate;
3024	backend->readdir = fs_readdir;
3025	backend->fsync = fs_fsync;
3026	backend->lock = fs_lock;
3027	backend->getlock = fs_getlock;
3028	backend->link = fs_link;
3029	backend->mkdir = fs_mkdir;
3030	backend->renameat = fs_renameat;
3031	backend->unlinkat = fs_unlinkat;
3032	backend->freefid = fs_freefid;
3033
3034	sc = l9p_malloc(sizeof(*sc));
3035	sc->fs_rootfd = rootfd;
3036	sc->fs_readonly = ro;
3037	backend->softc = sc;
3038
3039#if defined(WITH_CASPER)
3040	capcas = cap_init();
3041	if (capcas == NULL)
3042		return (-1);
3043
3044	sc->fs_cappwd = cap_service_open(capcas, "system.pwd");
3045	if (sc->fs_cappwd == NULL)
3046		return (-1);
3047
3048	sc->fs_capgrp = cap_service_open(capcas, "system.grp");
3049	if (sc->fs_capgrp == NULL)
3050		return (-1);
3051
3052	cap_setpassent(sc->fs_cappwd, 1);
3053	cap_setgroupent(sc->fs_capgrp, 1);
3054	cap_close(capcas);
3055#else
3056	setpassent(1);
3057#endif
3058
3059	*backendp = backend;
3060	return (0);
3061}
3062