1/*
2 * Copyright 2016 Chris Torek <torek@ixsystems.com>
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/*
28 * General ACL support for 9P2000.L.
29 *
30 * We mostly use Linux's xattr name space and nfs4 ACL bits, as
31 * these are the most general forms available.
32 *
33 * Linux requests attributes named
34 *
35 *     "system.posix_acl_default"
36 *     "system.posix_acl_access"
37 *
38 * to get POSIX style ACLs, and:
39 *
40 *     "system.nfs4_acl"
41 *
42 * to get NFSv4 style ACLs.  The v9fs client does not explicitly
43 * ask for the latter, but if you use the Ubuntu nfs4-acl-tools
44 * package, it should be able to read and write these.
45 *
46 * For the record, the Linux kernel source code also shows:
47 *
48 *  - Lustre uses "trusted.*", with "*" matching "lov", "lma",
49 *    "lmv", "dmv", "link", "fid", "version", "som", "hsm", and
50 *    "lfsck_namespace".
51 *
52 *  - ceph has a name tree of the form "ceph.<type>.<name>" with
53 *     <type,name> pairs like <"dir","entries">, <"dir","files>,
54 *     <"file","layout">, and so on.
55 *
56 *  - ext4 uses the POSIX names, plus some special ext4-specific
57 *    goop that might not get externalized.
58 *
59 *  - NFS uses both the POSIX names and the NFSv4 ACLs.  However,
60 *    what it mainly does is have nfsd generate fake NFSv4 ACLs
61 *    from POSIX ACLs.  If you run an NFS client, the client
62 *    relies on the server actually implementing the ACLs, and
63 *    lets nfs4-acl-tools read and write the system.nfs4_acl xattr
64 *    data.  If you run an NFS server off, e.g., an ext4 file system,
65 *    the server looks for the system.nfs4_acl xattr, serves that
66 *    out if found, and otherwise just generates the fakes.
67 *
68 *  - "security.*" and "selinux.*" are reserved.
69 *
70 *  - "security.capability" is the name for capabilities.
71 *
72 *  - sockets use "system.sockprotoname".
73 */
74
75#if defined(__APPLE__)
76  #define HAVE_POSIX_ACLS
77  #define HAVE_DARWIN_ACLS
78#endif
79
80#if defined(__FreeBSD__)
81  #define HAVE_POSIX_ACLS
82  #define HAVE_FREEBSD_ACLS
83#endif
84
85#include <sys/types.h>
86#include <sys/acl.h>		/* XXX assumes existence of sys/acl.h */
87
88/*
89 * An ACL consists of a number of ACEs that grant some kind of
90 * "allow" or "deny" to some specific entity.
91 *
92 * The number of ACEs is potentially unlimited, although in practice
93 * they tend not to be that long.
94 *
95 * It's the responsibility of the back-end to supply the ACL
96 * for each test.  However, the ACL may be in some sort of
97 * system-specific form.  It's the responsibility of some
98 * (system-specific) code to translate it to *this* form, after
99 * which the backend may use l9p_acl_check_access() to get
100 * access granted or denied (and, eventually, audits and alarms
101 * recorded and raises, although that's yet to be designed).
102 *
103 * The reason for all this faffing-about with formats is so that
104 * we can *report* the ACLs using Linux 9p style xattrs.
105 */
106
107struct l9p_acl;
108struct l9p_fid;
109
110void l9p_acl_free(struct l9p_acl *);
111
112/*
113 * An ACL is made up of ACEs.
114 *
115 * Each ACE has:
116 *
117 *   - a type: allow, deny, audit, alarm
118 *   - a set of flags
119 *   - permissions bits: a "mask"
120 *   - an optional, nominally-variable-length identity
121 *
122 * The last part is especially tricky and currently has limited
123 * support here: it's always a 16 byte field on Darwin, and just
124 * a uint32_t on BSD (should be larger, really).  Linux supports
125 * very large, actually-variable-size values; we'll deal with
126 * this later, maybe.
127 *
128 * We will define the mask first, below, since these are also the bits
129 * passed in for the accmask argument to l9p_acl_check_access().
130 */
131
132/*
133 * ACL entry mask, and accmask argument flags.
134 *
135 * NB: not every bit is implemented, but they are all here because
136 * they are all defined as part of an NFSv4 ACL entry, which is
137 * more or less a superset of a POSIX ACL entry.  This means you
138 * can put a complete NFSv4 ACL in and we can reproduce it.
139 *
140 * Note that the LIST_DIRECTORY, ADD_FILE, and ADD_SUBDIRECTORY bits
141 * apply only to a directory, while the READ_DATA, WRITE_DATA, and
142 * APPEND_DATA bits apply only to a file.  See aca_parent/aca_child
143 * below.
144 */
145#define	L9P_ACE_READ_DATA		0x00001
146#define	L9P_ACE_LIST_DIRECTORY		0x00001 /* same as READ_DATA */
147#define	L9P_ACE_WRITE_DATA		0x00002
148#define	L9P_ACE_ADD_FILE		0x00002 /* same as WRITE_DATA */
149#define	L9P_ACE_APPEND_DATA		0x00004
150#define	L9P_ACE_ADD_SUBDIRECTORY	0x00004 /* same as APPEND_DATA */
151#define	L9P_ACE_READ_NAMED_ATTRS	0x00008
152#define	L9P_ACE_WRITE_NAMED_ATTRS	0x00010
153#define	L9P_ACE_EXECUTE			0x00020
154#define	L9P_ACE_DELETE_CHILD		0x00040
155#define	L9P_ACE_READ_ATTRIBUTES		0x00080
156#define	L9P_ACE_WRITE_ATTRIBUTES	0x00100
157#define	L9P_ACE_WRITE_RETENTION		0x00200 /* not used here */
158#define	L9P_ACE_WRITE_RETENTION_HOLD	0x00400 /* not used here */
159/*					0x00800 unused? */
160#define	L9P_ACE_DELETE			0x01000
161#define	L9P_ACE_READ_ACL		0x02000
162#define	L9P_ACE_WRITE_ACL		0x04000
163#define	L9P_ACE_WRITE_OWNER		0x08000
164#define	L9P_ACE_SYNCHRONIZE		0x10000 /* not used here */
165
166/*
167 * This is not an ACE bit, but is used with the access checking
168 * below.  It represents a request to unlink (delete child /
169 * delete) an entity, and is equivalent to asking for *either*
170 * (not both) permission.
171 */
172#define	L9P_ACOP_UNLINK (L9P_ACE_DELETE_CHILD | L9P_ACE_DELETE)
173
174/*
175 * Access checking takes a lot of arguments, so they are
176 * collected into a "struct" here.
177 *
178 * The aca_parent and aca_pstat fields may/must be NULL if the
179 * operation itself does not involve "directory" permissions.
180 * The aca_child and aca_cstat fields may/must be NULL if the
181 * operation does not involve anything *but* a directory.  This
182 * is how we decide whether you're interested in L9P_ACE_READ_DATA
183 * vs L9P_ACE_LIST_DIRECTORY, for instance.
184 *
185 * Note that it's OK for both parent and child to be directories
186 * (as is the case when we're adding or deleting a subdirectory).
187 */
188struct l9p_acl_check_args {
189	uid_t	aca_uid;		/* the uid that is requesting access */
190	gid_t	aca_gid;		/* the gid that is requesting access */
191	gid_t	*aca_groups;		/* the additional group-set, if any */
192	size_t	aca_ngroups;		/* number of groups in group-set */
193	struct l9p_acl *aca_parent;	/* ACLs associated with parent/dir */
194	struct stat *aca_pstat;		/* stat data for parent/dir */
195	struct l9p_acl *aca_child;	/* ACLs associated with file */
196	struct stat *aca_cstat;		/* stat data for file */
197	int	aca_aclmode;		/* mode checking bits, see below */
198	bool	aca_superuser;		/* alway allow uid==0 in STAT_MODE */
199};
200
201/*
202 * Access checking mode bits in aca_checkmode.  If you enable
203 * ACLs, they are used first, optionally with ZFS style ACLs.
204 * This means that even if aca_superuser is set, if an ACL denies
205 * permission to uid 0, permission is really denied.
206 *
207 * NFS style ACLs run before POSIX style ACLs (though POSIX
208 * ACLs aren't done yet anyway).
209 *
210 * N.B.: you probably want L9P_ACL_ZFS, especially when operating
211 * with a ZFS file system on FreeBSD.
212 */
213#define	L9P_ACM_NFS_ACL		0x0001	/* enable NFS ACL checking */
214#define	L9P_ACM_ZFS_ACL		0x0002	/* use ZFS ACL unlink semantics */
215#define	L9P_ACM_POSIX_ACL	0x0004	/* enable POSIX ACL checking (notyet) */
216#define	L9P_ACM_STAT_MODE	0x0008	/* enable st_mode bits */
217
218/*
219 * Requests to access some file or directory must provide:
220 *
221 *  - An operation.  This should usually be just one bit from the
222 *    L9P_ACE_* bit-sets above, or our special L9P_ACOP_UNLINK.
223 *    For a few file-open operations it may be multiple bits,
224 *    e.g., both read and write data.
225 *  - The identity of the accessor: uid + gid + gid-set.
226 *  - The type of access desired: this may be multiple bits.
227 *  - The parent directory, if applicable.
228 *  - The child file/dir being accessed, if applicable.
229 *  - stat data for parent and/or child, if applicable.
230 *
231 * The ACLs and/or stat data of the parent and/or child get used
232 * here, so the caller must provide them.  We should have a way to
233 * cache these on fids, but not yet.  The parent and child
234 * arguments are a bit tricky; see the code in genacl.c.
235 */
236int l9p_acl_check_access(int32_t op, struct l9p_acl_check_args *args);
237
238/*
239 * When falling back to POSIX ACL or Unix-style permissions
240 * testing, it's nice to collapse the above detailed permissions
241 * into simple read/write/execute bits (value 0..7).  We provide
242 * a small utility function that does this.
243 */
244int l9p_ace_mask_to_rwx(int32_t);
245
246/*
247 * The rest of the data in an ACE.
248 */
249
250/* type in ace_type */
251#define	L9P_ACET_ACCESS_ALLOWED		0
252#define	L9P_ACET_ACCESS_DENIED		1
253#define	L9P_ACET_SYSTEM_AUDIT		2
254#define	L9P_ACET_SYSTEM_ALARM		3
255
256/* flags in ace_flags */
257#define	L9P_ACEF_FILE_INHERIT_ACE		0x001
258#define	L9P_ACEF_DIRECTORY_INHERIT_ACE		0x002
259#define	L9P_ACEF_NO_PROPAGATE_INHERIT_ACE	0x004
260#define	L9P_ACEF_INHERIT_ONLY_ACE		0x008
261#define	L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG	0x010
262#define	L9P_ACEF_FAILED_ACCESS_ACE_FLAG		0x020
263#define	L9P_ACEF_IDENTIFIER_GROUP		0x040
264#define	L9P_ACEF_OWNER				0x080
265#define	L9P_ACEF_GROUP				0x100
266#define	L9P_ACEF_EVERYONE			0x200
267
268#if defined(__APPLE__)
269#  define L9P_ACE_IDSIZE 16 /* but, how do we map Darwin uuid? */
270#else
271#  define L9P_ACE_IDSIZE 4
272#endif
273
274struct l9p_ace {
275	uint16_t ace_type;		/* ACL entry type */
276	uint16_t ace_flags;		/* ACL entry flags */
277	uint32_t ace_mask;		/* ACL entry mask */
278	uint32_t ace_idsize;		/* length of ace_idbytes */
279	unsigned char ace_idbytes[L9P_ACE_IDSIZE];
280};
281
282#define	L9P_ACLTYPE_NFSv4	1	/* currently the only valid type */
283struct l9p_acl {
284	uint32_t acl_acetype;		/* reserved for future expansion */
285	uint32_t acl_nace;		/* number of occupied ACEs */
286	uint32_t acl_aceasize;		/* actual size of ACE array */
287	struct l9p_ace acl_aces[];	/* variable length ACE array */
288};
289
290/*
291 * These are the system-specific converters.
292 *
293 * Right now the backend needs to just find BSD NFSv4 ACLs
294 * and convert them before each operation that needs to be
295 * tested.
296 */
297#if defined(HAVE_DARWIN_ACLS)
298struct l9p_acl *l9p_darwin_nfsv4acl_to_acl(acl_t acl);
299#endif
300
301#if defined(HAVE_FREEBSD_ACLS)
302struct l9p_acl *l9p_freebsd_nfsv4acl_to_acl(acl_t acl);
303#endif
304
305#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */
306struct l9p_acl *l9p_posix_acl_to_acl(acl_t acl);
307#endif
308