zfs_ioctl.c revision 290756
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011-2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
25 * All rights reserved.
26 * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
27 * Copyright 2014 Xin Li <delphij@FreeBSD.org>. All rights reserved.
28 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
29 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 */
34
35/*
36 * ZFS ioctls.
37 *
38 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
39 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
40 *
41 * There are two ways that we handle ioctls: the legacy way where almost
42 * all of the logic is in the ioctl callback, and the new way where most
43 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
44 *
45 * Non-legacy ioctls should be registered by calling
46 * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
47 * from userland by lzc_ioctl().
48 *
49 * The registration arguments are as follows:
50 *
51 * const char *name
52 *   The name of the ioctl.  This is used for history logging.  If the
53 *   ioctl returns successfully (the callback returns 0), and allow_log
54 *   is true, then a history log entry will be recorded with the input &
55 *   output nvlists.  The log entry can be printed with "zpool history -i".
56 *
57 * zfs_ioc_t ioc
58 *   The ioctl request number, which userland will pass to ioctl(2).
59 *   The ioctl numbers can change from release to release, because
60 *   the caller (libzfs) must be matched to the kernel.
61 *
62 * zfs_secpolicy_func_t *secpolicy
63 *   This function will be called before the zfs_ioc_func_t, to
64 *   determine if this operation is permitted.  It should return EPERM
65 *   on failure, and 0 on success.  Checks include determining if the
66 *   dataset is visible in this zone, and if the user has either all
67 *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
68 *   to do this operation on this dataset with "zfs allow".
69 *
70 * zfs_ioc_namecheck_t namecheck
71 *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
72 *   name, a dataset name, or nothing.  If the name is not well-formed,
73 *   the ioctl will fail and the callback will not be called.
74 *   Therefore, the callback can assume that the name is well-formed
75 *   (e.g. is null-terminated, doesn't have more than one '@' character,
76 *   doesn't have invalid characters).
77 *
78 * zfs_ioc_poolcheck_t pool_check
79 *   This specifies requirements on the pool state.  If the pool does
80 *   not meet them (is suspended or is readonly), the ioctl will fail
81 *   and the callback will not be called.  If any checks are specified
82 *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
83 *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
84 *   POOL_CHECK_READONLY).
85 *
86 * boolean_t smush_outnvlist
87 *   If smush_outnvlist is true, then the output is presumed to be a
88 *   list of errors, and it will be "smushed" down to fit into the
89 *   caller's buffer, by removing some entries and replacing them with a
90 *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
91 *   nvlist_smush() for details.  If smush_outnvlist is false, and the
92 *   outnvlist does not fit into the userland-provided buffer, then the
93 *   ioctl will fail with ENOMEM.
94 *
95 * zfs_ioc_func_t *func
96 *   The callback function that will perform the operation.
97 *
98 *   The callback should return 0 on success, or an error number on
99 *   failure.  If the function fails, the userland ioctl will return -1,
100 *   and errno will be set to the callback's return value.  The callback
101 *   will be called with the following arguments:
102 *
103 *   const char *name
104 *     The name of the pool or dataset to operate on, from
105 *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
106 *     expected type (pool, dataset, or none).
107 *
108 *   nvlist_t *innvl
109 *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
110 *     NULL if no input nvlist was provided.  Changes to this nvlist are
111 *     ignored.  If the input nvlist could not be deserialized, the
112 *     ioctl will fail and the callback will not be called.
113 *
114 *   nvlist_t *outnvl
115 *     The output nvlist, initially empty.  The callback can fill it in,
116 *     and it will be returned to userland by serializing it into
117 *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
118 *     fails (e.g. because the caller didn't supply a large enough
119 *     buffer), then the overall ioctl will fail.  See the
120 *     'smush_nvlist' argument above for additional behaviors.
121 *
122 *     There are two typical uses of the output nvlist:
123 *       - To return state, e.g. property values.  In this case,
124 *         smush_outnvlist should be false.  If the buffer was not large
125 *         enough, the caller will reallocate a larger buffer and try
126 *         the ioctl again.
127 *
128 *       - To return multiple errors from an ioctl which makes on-disk
129 *         changes.  In this case, smush_outnvlist should be true.
130 *         Ioctls which make on-disk modifications should generally not
131 *         use the outnvl if they succeed, because the caller can not
132 *         distinguish between the operation failing, and
133 *         deserialization failing.
134 */
135
136#include <sys/types.h>
137#include <sys/param.h>
138#include <sys/systm.h>
139#include <sys/conf.h>
140#include <sys/kernel.h>
141#include <sys/lock.h>
142#include <sys/malloc.h>
143#include <sys/mutex.h>
144#include <sys/proc.h>
145#include <sys/errno.h>
146#include <sys/uio.h>
147#include <sys/buf.h>
148#include <sys/file.h>
149#include <sys/kmem.h>
150#include <sys/conf.h>
151#include <sys/cmn_err.h>
152#include <sys/stat.h>
153#include <sys/zfs_ioctl.h>
154#include <sys/zfs_vfsops.h>
155#include <sys/zfs_znode.h>
156#include <sys/zap.h>
157#include <sys/spa.h>
158#include <sys/spa_impl.h>
159#include <sys/vdev.h>
160#include <sys/dmu.h>
161#include <sys/dsl_dir.h>
162#include <sys/dsl_dataset.h>
163#include <sys/dsl_prop.h>
164#include <sys/dsl_deleg.h>
165#include <sys/dmu_objset.h>
166#include <sys/dmu_impl.h>
167#include <sys/dmu_tx.h>
168#include <sys/sunddi.h>
169#include <sys/policy.h>
170#include <sys/zone.h>
171#include <sys/nvpair.h>
172#include <sys/mount.h>
173#include <sys/taskqueue.h>
174#include <sys/sdt.h>
175#include <sys/varargs.h>
176#include <sys/fs/zfs.h>
177#include <sys/zfs_ctldir.h>
178#include <sys/zfs_dir.h>
179#include <sys/zfs_onexit.h>
180#include <sys/zvol.h>
181#include <sys/dsl_scan.h>
182#include <sys/dmu_objset.h>
183#include <sys/dmu_send.h>
184#include <sys/dsl_destroy.h>
185#include <sys/dsl_bookmark.h>
186#include <sys/dsl_userhold.h>
187#include <sys/zfeature.h>
188
189#include "zfs_namecheck.h"
190#include "zfs_prop.h"
191#include "zfs_deleg.h"
192#include "zfs_comutil.h"
193#include "zfs_ioctl_compat.h"
194
195CTASSERT(sizeof(zfs_cmd_t) < IOCPARM_MAX);
196
197static struct cdev *zfsdev;
198
199extern void zfs_init(void);
200extern void zfs_fini(void);
201
202uint_t zfs_fsyncer_key;
203extern uint_t rrw_tsd_key;
204static uint_t zfs_allow_log_key;
205
206typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
207typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
208typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
209
210typedef enum {
211	NO_NAME,
212	POOL_NAME,
213	DATASET_NAME
214} zfs_ioc_namecheck_t;
215
216typedef enum {
217	POOL_CHECK_NONE		= 1 << 0,
218	POOL_CHECK_SUSPENDED	= 1 << 1,
219	POOL_CHECK_READONLY	= 1 << 2,
220} zfs_ioc_poolcheck_t;
221
222typedef struct zfs_ioc_vec {
223	zfs_ioc_legacy_func_t	*zvec_legacy_func;
224	zfs_ioc_func_t		*zvec_func;
225	zfs_secpolicy_func_t	*zvec_secpolicy;
226	zfs_ioc_namecheck_t	zvec_namecheck;
227	boolean_t		zvec_allow_log;
228	zfs_ioc_poolcheck_t	zvec_pool_check;
229	boolean_t		zvec_smush_outnvlist;
230	const char		*zvec_name;
231} zfs_ioc_vec_t;
232
233/* This array is indexed by zfs_userquota_prop_t */
234static const char *userquota_perms[] = {
235	ZFS_DELEG_PERM_USERUSED,
236	ZFS_DELEG_PERM_USERQUOTA,
237	ZFS_DELEG_PERM_GROUPUSED,
238	ZFS_DELEG_PERM_GROUPQUOTA,
239};
240
241static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
242static int zfs_check_settable(const char *name, nvpair_t *property,
243    cred_t *cr);
244static int zfs_check_clearable(char *dataset, nvlist_t *props,
245    nvlist_t **errors);
246static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
247    boolean_t *);
248int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
249static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
250
251static void zfsdev_close(void *data);
252
253static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
254
255/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
256void
257__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
258{
259	const char *newfile;
260	char buf[512];
261	va_list adx;
262
263	/*
264	 * Get rid of annoying "../common/" prefix to filename.
265	 */
266	newfile = strrchr(file, '/');
267	if (newfile != NULL) {
268		newfile = newfile + 1; /* Get rid of leading / */
269	} else {
270		newfile = file;
271	}
272
273	va_start(adx, fmt);
274	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
275	va_end(adx);
276
277	/*
278	 * To get this data, use the zfs-dprintf probe as so:
279	 * dtrace -q -n 'zfs-dprintf \
280	 *	/stringof(arg0) == "dbuf.c"/ \
281	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
282	 * arg0 = file name
283	 * arg1 = function name
284	 * arg2 = line number
285	 * arg3 = message
286	 */
287	DTRACE_PROBE4(zfs__dprintf,
288	    char *, newfile, char *, func, int, line, char *, buf);
289}
290
291static void
292history_str_free(char *buf)
293{
294	kmem_free(buf, HIS_MAX_RECORD_LEN);
295}
296
297static char *
298history_str_get(zfs_cmd_t *zc)
299{
300	char *buf;
301
302	if (zc->zc_history == 0)
303		return (NULL);
304
305	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
306	if (copyinstr((void *)(uintptr_t)zc->zc_history,
307	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
308		history_str_free(buf);
309		return (NULL);
310	}
311
312	buf[HIS_MAX_RECORD_LEN -1] = '\0';
313
314	return (buf);
315}
316
317/*
318 * Check to see if the named dataset is currently defined as bootable
319 */
320static boolean_t
321zfs_is_bootfs(const char *name)
322{
323	objset_t *os;
324
325	if (dmu_objset_hold(name, FTAG, &os) == 0) {
326		boolean_t ret;
327		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
328		dmu_objset_rele(os, FTAG);
329		return (ret);
330	}
331	return (B_FALSE);
332}
333
334/*
335 * Return non-zero if the spa version is less than requested version.
336 */
337static int
338zfs_earlier_version(const char *name, int version)
339{
340	spa_t *spa;
341
342	if (spa_open(name, &spa, FTAG) == 0) {
343		if (spa_version(spa) < version) {
344			spa_close(spa, FTAG);
345			return (1);
346		}
347		spa_close(spa, FTAG);
348	}
349	return (0);
350}
351
352/*
353 * Return TRUE if the ZPL version is less than requested version.
354 */
355static boolean_t
356zpl_earlier_version(const char *name, int version)
357{
358	objset_t *os;
359	boolean_t rc = B_TRUE;
360
361	if (dmu_objset_hold(name, FTAG, &os) == 0) {
362		uint64_t zplversion;
363
364		if (dmu_objset_type(os) != DMU_OST_ZFS) {
365			dmu_objset_rele(os, FTAG);
366			return (B_TRUE);
367		}
368		/* XXX reading from non-owned objset */
369		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
370			rc = zplversion < version;
371		dmu_objset_rele(os, FTAG);
372	}
373	return (rc);
374}
375
376static void
377zfs_log_history(zfs_cmd_t *zc)
378{
379	spa_t *spa;
380	char *buf;
381
382	if ((buf = history_str_get(zc)) == NULL)
383		return;
384
385	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
386		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
387			(void) spa_history_log(spa, buf);
388		spa_close(spa, FTAG);
389	}
390	history_str_free(buf);
391}
392
393/*
394 * Policy for top-level read operations (list pools).  Requires no privileges,
395 * and can be used in the local zone, as there is no associated dataset.
396 */
397/* ARGSUSED */
398static int
399zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
400{
401	return (0);
402}
403
404/*
405 * Policy for dataset read operations (list children, get statistics).  Requires
406 * no privileges, but must be visible in the local zone.
407 */
408/* ARGSUSED */
409static int
410zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
411{
412	if (INGLOBALZONE(curthread) ||
413	    zone_dataset_visible(zc->zc_name, NULL))
414		return (0);
415
416	return (SET_ERROR(ENOENT));
417}
418
419static int
420zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
421{
422	int writable = 1;
423
424	/*
425	 * The dataset must be visible by this zone -- check this first
426	 * so they don't see EPERM on something they shouldn't know about.
427	 */
428	if (!INGLOBALZONE(curthread) &&
429	    !zone_dataset_visible(dataset, &writable))
430		return (SET_ERROR(ENOENT));
431
432	if (INGLOBALZONE(curthread)) {
433		/*
434		 * If the fs is zoned, only root can access it from the
435		 * global zone.
436		 */
437		if (secpolicy_zfs(cr) && zoned)
438			return (SET_ERROR(EPERM));
439	} else {
440		/*
441		 * If we are in a local zone, the 'zoned' property must be set.
442		 */
443		if (!zoned)
444			return (SET_ERROR(EPERM));
445
446		/* must be writable by this zone */
447		if (!writable)
448			return (SET_ERROR(EPERM));
449	}
450	return (0);
451}
452
453static int
454zfs_dozonecheck(const char *dataset, cred_t *cr)
455{
456	uint64_t zoned;
457
458	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
459		return (SET_ERROR(ENOENT));
460
461	return (zfs_dozonecheck_impl(dataset, zoned, cr));
462}
463
464static int
465zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
466{
467	uint64_t zoned;
468
469	if (dsl_prop_get_int_ds(ds, "jailed", &zoned))
470		return (SET_ERROR(ENOENT));
471
472	return (zfs_dozonecheck_impl(dataset, zoned, cr));
473}
474
475static int
476zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
477    const char *perm, cred_t *cr)
478{
479	int error;
480
481	error = zfs_dozonecheck_ds(name, ds, cr);
482	if (error == 0) {
483		error = secpolicy_zfs(cr);
484		if (error != 0)
485			error = dsl_deleg_access_impl(ds, perm, cr);
486	}
487	return (error);
488}
489
490static int
491zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
492{
493	int error;
494	dsl_dataset_t *ds;
495	dsl_pool_t *dp;
496
497	error = dsl_pool_hold(name, FTAG, &dp);
498	if (error != 0)
499		return (error);
500
501	error = dsl_dataset_hold(dp, name, FTAG, &ds);
502	if (error != 0) {
503		dsl_pool_rele(dp, FTAG);
504		return (error);
505	}
506
507	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
508
509	dsl_dataset_rele(ds, FTAG);
510	dsl_pool_rele(dp, FTAG);
511	return (error);
512}
513
514#ifdef SECLABEL
515/*
516 * Policy for setting the security label property.
517 *
518 * Returns 0 for success, non-zero for access and other errors.
519 */
520static int
521zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
522{
523	char		ds_hexsl[MAXNAMELEN];
524	bslabel_t	ds_sl, new_sl;
525	boolean_t	new_default = FALSE;
526	uint64_t	zoned;
527	int		needed_priv = -1;
528	int		error;
529
530	/* First get the existing dataset label. */
531	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
532	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
533	if (error != 0)
534		return (SET_ERROR(EPERM));
535
536	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
537		new_default = TRUE;
538
539	/* The label must be translatable */
540	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
541		return (SET_ERROR(EINVAL));
542
543	/*
544	 * In a non-global zone, disallow attempts to set a label that
545	 * doesn't match that of the zone; otherwise no other checks
546	 * are needed.
547	 */
548	if (!INGLOBALZONE(curproc)) {
549		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
550			return (SET_ERROR(EPERM));
551		return (0);
552	}
553
554	/*
555	 * For global-zone datasets (i.e., those whose zoned property is
556	 * "off", verify that the specified new label is valid for the
557	 * global zone.
558	 */
559	if (dsl_prop_get_integer(name,
560	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
561		return (SET_ERROR(EPERM));
562	if (!zoned) {
563		if (zfs_check_global_label(name, strval) != 0)
564			return (SET_ERROR(EPERM));
565	}
566
567	/*
568	 * If the existing dataset label is nondefault, check if the
569	 * dataset is mounted (label cannot be changed while mounted).
570	 * Get the zfsvfs; if there isn't one, then the dataset isn't
571	 * mounted (or isn't a dataset, doesn't exist, ...).
572	 */
573	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
574		objset_t *os;
575		static char *setsl_tag = "setsl_tag";
576
577		/*
578		 * Try to own the dataset; abort if there is any error,
579		 * (e.g., already mounted, in use, or other error).
580		 */
581		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
582		    setsl_tag, &os);
583		if (error != 0)
584			return (SET_ERROR(EPERM));
585
586		dmu_objset_disown(os, setsl_tag);
587
588		if (new_default) {
589			needed_priv = PRIV_FILE_DOWNGRADE_SL;
590			goto out_check;
591		}
592
593		if (hexstr_to_label(strval, &new_sl) != 0)
594			return (SET_ERROR(EPERM));
595
596		if (blstrictdom(&ds_sl, &new_sl))
597			needed_priv = PRIV_FILE_DOWNGRADE_SL;
598		else if (blstrictdom(&new_sl, &ds_sl))
599			needed_priv = PRIV_FILE_UPGRADE_SL;
600	} else {
601		/* dataset currently has a default label */
602		if (!new_default)
603			needed_priv = PRIV_FILE_UPGRADE_SL;
604	}
605
606out_check:
607	if (needed_priv != -1)
608		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
609	return (0);
610}
611#endif	/* SECLABEL */
612
613static int
614zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
615    cred_t *cr)
616{
617	char *strval;
618
619	/*
620	 * Check permissions for special properties.
621	 */
622	switch (prop) {
623	case ZFS_PROP_ZONED:
624		/*
625		 * Disallow setting of 'zoned' from within a local zone.
626		 */
627		if (!INGLOBALZONE(curthread))
628			return (SET_ERROR(EPERM));
629		break;
630
631	case ZFS_PROP_QUOTA:
632	case ZFS_PROP_FILESYSTEM_LIMIT:
633	case ZFS_PROP_SNAPSHOT_LIMIT:
634		if (!INGLOBALZONE(curthread)) {
635			uint64_t zoned;
636			char setpoint[MAXNAMELEN];
637			/*
638			 * Unprivileged users are allowed to modify the
639			 * limit on things *under* (ie. contained by)
640			 * the thing they own.
641			 */
642			if (dsl_prop_get_integer(dsname, "jailed", &zoned,
643			    setpoint))
644				return (SET_ERROR(EPERM));
645			if (!zoned || strlen(dsname) <= strlen(setpoint))
646				return (SET_ERROR(EPERM));
647		}
648		break;
649
650	case ZFS_PROP_MLSLABEL:
651#ifdef SECLABEL
652		if (!is_system_labeled())
653			return (SET_ERROR(EPERM));
654
655		if (nvpair_value_string(propval, &strval) == 0) {
656			int err;
657
658			err = zfs_set_slabel_policy(dsname, strval, CRED());
659			if (err != 0)
660				return (err);
661		}
662#else
663		return (EOPNOTSUPP);
664#endif
665		break;
666	}
667
668	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
669}
670
671/* ARGSUSED */
672static int
673zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
674{
675	int error;
676
677	error = zfs_dozonecheck(zc->zc_name, cr);
678	if (error != 0)
679		return (error);
680
681	/*
682	 * permission to set permissions will be evaluated later in
683	 * dsl_deleg_can_allow()
684	 */
685	return (0);
686}
687
688/* ARGSUSED */
689static int
690zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
691{
692	return (zfs_secpolicy_write_perms(zc->zc_name,
693	    ZFS_DELEG_PERM_ROLLBACK, cr));
694}
695
696/* ARGSUSED */
697static int
698zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
699{
700	dsl_pool_t *dp;
701	dsl_dataset_t *ds;
702	char *cp;
703	int error;
704
705	/*
706	 * Generate the current snapshot name from the given objsetid, then
707	 * use that name for the secpolicy/zone checks.
708	 */
709	cp = strchr(zc->zc_name, '@');
710	if (cp == NULL)
711		return (SET_ERROR(EINVAL));
712	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
713	if (error != 0)
714		return (error);
715
716	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
717	if (error != 0) {
718		dsl_pool_rele(dp, FTAG);
719		return (error);
720	}
721
722	dsl_dataset_name(ds, zc->zc_name);
723
724	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
725	    ZFS_DELEG_PERM_SEND, cr);
726	dsl_dataset_rele(ds, FTAG);
727	dsl_pool_rele(dp, FTAG);
728
729	return (error);
730}
731
732/* ARGSUSED */
733static int
734zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
735{
736	return (zfs_secpolicy_write_perms(zc->zc_name,
737	    ZFS_DELEG_PERM_SEND, cr));
738}
739
740/* ARGSUSED */
741static int
742zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
743{
744	vnode_t *vp;
745	int error;
746
747	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
748	    NO_FOLLOW, NULL, &vp)) != 0)
749		return (error);
750
751	/* Now make sure mntpnt and dataset are ZFS */
752
753	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
754	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
755	    zc->zc_name) != 0)) {
756		VN_RELE(vp);
757		return (SET_ERROR(EPERM));
758	}
759
760	VN_RELE(vp);
761	return (dsl_deleg_access(zc->zc_name,
762	    ZFS_DELEG_PERM_SHARE, cr));
763}
764
765int
766zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
767{
768	if (!INGLOBALZONE(curthread))
769		return (SET_ERROR(EPERM));
770
771	if (secpolicy_nfs(cr) == 0) {
772		return (0);
773	} else {
774		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
775	}
776}
777
778int
779zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
780{
781	if (!INGLOBALZONE(curthread))
782		return (SET_ERROR(EPERM));
783
784	if (secpolicy_smb(cr) == 0) {
785		return (0);
786	} else {
787		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
788	}
789}
790
791static int
792zfs_get_parent(const char *datasetname, char *parent, int parentsize)
793{
794	char *cp;
795
796	/*
797	 * Remove the @bla or /bla from the end of the name to get the parent.
798	 */
799	(void) strncpy(parent, datasetname, parentsize);
800	cp = strrchr(parent, '@');
801	if (cp != NULL) {
802		cp[0] = '\0';
803	} else {
804		cp = strrchr(parent, '/');
805		if (cp == NULL)
806			return (SET_ERROR(ENOENT));
807		cp[0] = '\0';
808	}
809
810	return (0);
811}
812
813int
814zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
815{
816	int error;
817
818	if ((error = zfs_secpolicy_write_perms(name,
819	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
820		return (error);
821
822	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
823}
824
825/* ARGSUSED */
826static int
827zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
828{
829	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
830}
831
832/*
833 * Destroying snapshots with delegated permissions requires
834 * descendant mount and destroy permissions.
835 */
836/* ARGSUSED */
837static int
838zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
839{
840	nvlist_t *snaps;
841	nvpair_t *pair, *nextpair;
842	int error = 0;
843
844	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
845		return (SET_ERROR(EINVAL));
846	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
847	    pair = nextpair) {
848		nextpair = nvlist_next_nvpair(snaps, pair);
849		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
850		if (error == ENOENT) {
851			/*
852			 * Ignore any snapshots that don't exist (we consider
853			 * them "already destroyed").  Remove the name from the
854			 * nvl here in case the snapshot is created between
855			 * now and when we try to destroy it (in which case
856			 * we don't want to destroy it since we haven't
857			 * checked for permission).
858			 */
859			fnvlist_remove_nvpair(snaps, pair);
860			error = 0;
861		}
862		if (error != 0)
863			break;
864	}
865
866	return (error);
867}
868
869int
870zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
871{
872	char	parentname[MAXNAMELEN];
873	int	error;
874
875	if ((error = zfs_secpolicy_write_perms(from,
876	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
877		return (error);
878
879	if ((error = zfs_secpolicy_write_perms(from,
880	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
881		return (error);
882
883	if ((error = zfs_get_parent(to, parentname,
884	    sizeof (parentname))) != 0)
885		return (error);
886
887	if ((error = zfs_secpolicy_write_perms(parentname,
888	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
889		return (error);
890
891	if ((error = zfs_secpolicy_write_perms(parentname,
892	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
893		return (error);
894
895	return (error);
896}
897
898/* ARGSUSED */
899static int
900zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
901{
902	char *at = NULL;
903	int error;
904
905	if ((zc->zc_cookie & 1) != 0) {
906		/*
907		 * This is recursive rename, so the starting snapshot might
908		 * not exist. Check file system or volume permission instead.
909		 */
910		at = strchr(zc->zc_name, '@');
911		if (at == NULL)
912			return (EINVAL);
913		*at = '\0';
914	}
915
916	error = zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr);
917
918	if (at != NULL)
919		*at = '@';
920
921	return (error);
922}
923
924/* ARGSUSED */
925static int
926zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
927{
928	dsl_pool_t *dp;
929	dsl_dataset_t *clone;
930	int error;
931
932	error = zfs_secpolicy_write_perms(zc->zc_name,
933	    ZFS_DELEG_PERM_PROMOTE, cr);
934	if (error != 0)
935		return (error);
936
937	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
938	if (error != 0)
939		return (error);
940
941	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
942
943	if (error == 0) {
944		char parentname[MAXNAMELEN];
945		dsl_dataset_t *origin = NULL;
946		dsl_dir_t *dd;
947		dd = clone->ds_dir;
948
949		error = dsl_dataset_hold_obj(dd->dd_pool,
950		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
951		if (error != 0) {
952			dsl_dataset_rele(clone, FTAG);
953			dsl_pool_rele(dp, FTAG);
954			return (error);
955		}
956
957		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
958		    ZFS_DELEG_PERM_MOUNT, cr);
959
960		dsl_dataset_name(origin, parentname);
961		if (error == 0) {
962			error = zfs_secpolicy_write_perms_ds(parentname, origin,
963			    ZFS_DELEG_PERM_PROMOTE, cr);
964		}
965		dsl_dataset_rele(clone, FTAG);
966		dsl_dataset_rele(origin, FTAG);
967	}
968	dsl_pool_rele(dp, FTAG);
969	return (error);
970}
971
972/* ARGSUSED */
973static int
974zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
975{
976	int error;
977
978	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
979	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
980		return (error);
981
982	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
983	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
984		return (error);
985
986	return (zfs_secpolicy_write_perms(zc->zc_name,
987	    ZFS_DELEG_PERM_CREATE, cr));
988}
989
990int
991zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
992{
993	return (zfs_secpolicy_write_perms(name,
994	    ZFS_DELEG_PERM_SNAPSHOT, cr));
995}
996
997/*
998 * Check for permission to create each snapshot in the nvlist.
999 */
1000/* ARGSUSED */
1001static int
1002zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1003{
1004	nvlist_t *snaps;
1005	int error;
1006	nvpair_t *pair;
1007
1008	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1009		return (SET_ERROR(EINVAL));
1010	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1011	    pair = nvlist_next_nvpair(snaps, pair)) {
1012		char *name = nvpair_name(pair);
1013		char *atp = strchr(name, '@');
1014
1015		if (atp == NULL) {
1016			error = SET_ERROR(EINVAL);
1017			break;
1018		}
1019		*atp = '\0';
1020		error = zfs_secpolicy_snapshot_perms(name, cr);
1021		*atp = '@';
1022		if (error != 0)
1023			break;
1024	}
1025	return (error);
1026}
1027
1028/*
1029 * Check for permission to create each snapshot in the nvlist.
1030 */
1031/* ARGSUSED */
1032static int
1033zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1034{
1035	int error = 0;
1036
1037	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1038	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1039		char *name = nvpair_name(pair);
1040		char *hashp = strchr(name, '#');
1041
1042		if (hashp == NULL) {
1043			error = SET_ERROR(EINVAL);
1044			break;
1045		}
1046		*hashp = '\0';
1047		error = zfs_secpolicy_write_perms(name,
1048		    ZFS_DELEG_PERM_BOOKMARK, cr);
1049		*hashp = '#';
1050		if (error != 0)
1051			break;
1052	}
1053	return (error);
1054}
1055
1056/* ARGSUSED */
1057static int
1058zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1059{
1060	nvpair_t *pair, *nextpair;
1061	int error = 0;
1062
1063	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1064	    pair = nextpair) {
1065		char *name = nvpair_name(pair);
1066		char *hashp = strchr(name, '#');
1067		nextpair = nvlist_next_nvpair(innvl, pair);
1068
1069		if (hashp == NULL) {
1070			error = SET_ERROR(EINVAL);
1071			break;
1072		}
1073
1074		*hashp = '\0';
1075		error = zfs_secpolicy_write_perms(name,
1076		    ZFS_DELEG_PERM_DESTROY, cr);
1077		*hashp = '#';
1078		if (error == ENOENT) {
1079			/*
1080			 * Ignore any filesystems that don't exist (we consider
1081			 * their bookmarks "already destroyed").  Remove
1082			 * the name from the nvl here in case the filesystem
1083			 * is created between now and when we try to destroy
1084			 * the bookmark (in which case we don't want to
1085			 * destroy it since we haven't checked for permission).
1086			 */
1087			fnvlist_remove_nvpair(innvl, pair);
1088			error = 0;
1089		}
1090		if (error != 0)
1091			break;
1092	}
1093
1094	return (error);
1095}
1096
1097/* ARGSUSED */
1098static int
1099zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1100{
1101	/*
1102	 * Even root must have a proper TSD so that we know what pool
1103	 * to log to.
1104	 */
1105	if (tsd_get(zfs_allow_log_key) == NULL)
1106		return (SET_ERROR(EPERM));
1107	return (0);
1108}
1109
1110static int
1111zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1112{
1113	char	parentname[MAXNAMELEN];
1114	int	error;
1115	char	*origin;
1116
1117	if ((error = zfs_get_parent(zc->zc_name, parentname,
1118	    sizeof (parentname))) != 0)
1119		return (error);
1120
1121	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1122	    (error = zfs_secpolicy_write_perms(origin,
1123	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1124		return (error);
1125
1126	if ((error = zfs_secpolicy_write_perms(parentname,
1127	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1128		return (error);
1129
1130	return (zfs_secpolicy_write_perms(parentname,
1131	    ZFS_DELEG_PERM_MOUNT, cr));
1132}
1133
1134/*
1135 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1136 * SYS_CONFIG privilege, which is not available in a local zone.
1137 */
1138/* ARGSUSED */
1139static int
1140zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1141{
1142	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1143		return (SET_ERROR(EPERM));
1144
1145	return (0);
1146}
1147
1148/*
1149 * Policy for object to name lookups.
1150 */
1151/* ARGSUSED */
1152static int
1153zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1154{
1155	int error;
1156
1157	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1158		return (0);
1159
1160	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1161	return (error);
1162}
1163
1164/*
1165 * Policy for fault injection.  Requires all privileges.
1166 */
1167/* ARGSUSED */
1168static int
1169zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1170{
1171	return (secpolicy_zinject(cr));
1172}
1173
1174/* ARGSUSED */
1175static int
1176zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1177{
1178	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1179
1180	if (prop == ZPROP_INVAL) {
1181		if (!zfs_prop_user(zc->zc_value))
1182			return (SET_ERROR(EINVAL));
1183		return (zfs_secpolicy_write_perms(zc->zc_name,
1184		    ZFS_DELEG_PERM_USERPROP, cr));
1185	} else {
1186		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1187		    NULL, cr));
1188	}
1189}
1190
1191static int
1192zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1193{
1194	int err = zfs_secpolicy_read(zc, innvl, cr);
1195	if (err)
1196		return (err);
1197
1198	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1199		return (SET_ERROR(EINVAL));
1200
1201	if (zc->zc_value[0] == 0) {
1202		/*
1203		 * They are asking about a posix uid/gid.  If it's
1204		 * themself, allow it.
1205		 */
1206		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1207		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1208			if (zc->zc_guid == crgetuid(cr))
1209				return (0);
1210		} else {
1211			if (groupmember(zc->zc_guid, cr))
1212				return (0);
1213		}
1214	}
1215
1216	return (zfs_secpolicy_write_perms(zc->zc_name,
1217	    userquota_perms[zc->zc_objset_type], cr));
1218}
1219
1220static int
1221zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1222{
1223	int err = zfs_secpolicy_read(zc, innvl, cr);
1224	if (err)
1225		return (err);
1226
1227	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1228		return (SET_ERROR(EINVAL));
1229
1230	return (zfs_secpolicy_write_perms(zc->zc_name,
1231	    userquota_perms[zc->zc_objset_type], cr));
1232}
1233
1234/* ARGSUSED */
1235static int
1236zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1237{
1238	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1239	    NULL, cr));
1240}
1241
1242/* ARGSUSED */
1243static int
1244zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1245{
1246	nvpair_t *pair;
1247	nvlist_t *holds;
1248	int error;
1249
1250	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1251	if (error != 0)
1252		return (SET_ERROR(EINVAL));
1253
1254	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1255	    pair = nvlist_next_nvpair(holds, pair)) {
1256		char fsname[MAXNAMELEN];
1257		error = dmu_fsname(nvpair_name(pair), fsname);
1258		if (error != 0)
1259			return (error);
1260		error = zfs_secpolicy_write_perms(fsname,
1261		    ZFS_DELEG_PERM_HOLD, cr);
1262		if (error != 0)
1263			return (error);
1264	}
1265	return (0);
1266}
1267
1268/* ARGSUSED */
1269static int
1270zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1271{
1272	nvpair_t *pair;
1273	int error;
1274
1275	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1276	    pair = nvlist_next_nvpair(innvl, pair)) {
1277		char fsname[MAXNAMELEN];
1278		error = dmu_fsname(nvpair_name(pair), fsname);
1279		if (error != 0)
1280			return (error);
1281		error = zfs_secpolicy_write_perms(fsname,
1282		    ZFS_DELEG_PERM_RELEASE, cr);
1283		if (error != 0)
1284			return (error);
1285	}
1286	return (0);
1287}
1288
1289/*
1290 * Policy for allowing temporary snapshots to be taken or released
1291 */
1292static int
1293zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1294{
1295	/*
1296	 * A temporary snapshot is the same as a snapshot,
1297	 * hold, destroy and release all rolled into one.
1298	 * Delegated diff alone is sufficient that we allow this.
1299	 */
1300	int error;
1301
1302	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1303	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1304		return (0);
1305
1306	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1307	if (error == 0)
1308		error = zfs_secpolicy_hold(zc, innvl, cr);
1309	if (error == 0)
1310		error = zfs_secpolicy_release(zc, innvl, cr);
1311	if (error == 0)
1312		error = zfs_secpolicy_destroy(zc, innvl, cr);
1313	return (error);
1314}
1315
1316/*
1317 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1318 */
1319static int
1320get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1321{
1322	char *packed;
1323	int error;
1324	nvlist_t *list = NULL;
1325
1326	/*
1327	 * Read in and unpack the user-supplied nvlist.
1328	 */
1329	if (size == 0)
1330		return (SET_ERROR(EINVAL));
1331
1332	packed = kmem_alloc(size, KM_SLEEP);
1333
1334	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1335	    iflag)) != 0) {
1336		kmem_free(packed, size);
1337		return (error);
1338	}
1339
1340	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1341		kmem_free(packed, size);
1342		return (error);
1343	}
1344
1345	kmem_free(packed, size);
1346
1347	*nvp = list;
1348	return (0);
1349}
1350
1351/*
1352 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1353 * Entries will be removed from the end of the nvlist, and one int32 entry
1354 * named "N_MORE_ERRORS" will be added indicating how many entries were
1355 * removed.
1356 */
1357static int
1358nvlist_smush(nvlist_t *errors, size_t max)
1359{
1360	size_t size;
1361
1362	size = fnvlist_size(errors);
1363
1364	if (size > max) {
1365		nvpair_t *more_errors;
1366		int n = 0;
1367
1368		if (max < 1024)
1369			return (SET_ERROR(ENOMEM));
1370
1371		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1372		more_errors = nvlist_prev_nvpair(errors, NULL);
1373
1374		do {
1375			nvpair_t *pair = nvlist_prev_nvpair(errors,
1376			    more_errors);
1377			fnvlist_remove_nvpair(errors, pair);
1378			n++;
1379			size = fnvlist_size(errors);
1380		} while (size > max);
1381
1382		fnvlist_remove_nvpair(errors, more_errors);
1383		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1384		ASSERT3U(fnvlist_size(errors), <=, max);
1385	}
1386
1387	return (0);
1388}
1389
1390static int
1391put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1392{
1393	char *packed = NULL;
1394	int error = 0;
1395	size_t size;
1396
1397	size = fnvlist_size(nvl);
1398
1399	if (size > zc->zc_nvlist_dst_size) {
1400		/*
1401		 * Solaris returns ENOMEM here, because even if an error is
1402		 * returned from an ioctl(2), new zc_nvlist_dst_size will be
1403		 * passed to the userland. This is not the case for FreeBSD.
1404		 * We need to return 0, so the kernel will copy the
1405		 * zc_nvlist_dst_size back and the userland can discover that a
1406		 * bigger buffer is needed.
1407		 */
1408		error = 0;
1409	} else {
1410		packed = fnvlist_pack(nvl, &size);
1411		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1412		    size, zc->zc_iflags) != 0)
1413			error = SET_ERROR(EFAULT);
1414		fnvlist_pack_free(packed, size);
1415	}
1416
1417	zc->zc_nvlist_dst_size = size;
1418	zc->zc_nvlist_dst_filled = B_TRUE;
1419	return (error);
1420}
1421
1422static int
1423getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1424{
1425	objset_t *os;
1426	int error;
1427
1428	error = dmu_objset_hold(dsname, FTAG, &os);
1429	if (error != 0)
1430		return (error);
1431	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1432		dmu_objset_rele(os, FTAG);
1433		return (SET_ERROR(EINVAL));
1434	}
1435
1436	mutex_enter(&os->os_user_ptr_lock);
1437	*zfvp = dmu_objset_get_user(os);
1438	if (*zfvp) {
1439		VFS_HOLD((*zfvp)->z_vfs);
1440	} else {
1441		error = SET_ERROR(ESRCH);
1442	}
1443	mutex_exit(&os->os_user_ptr_lock);
1444	dmu_objset_rele(os, FTAG);
1445	return (error);
1446}
1447
1448/*
1449 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1450 * case its z_vfs will be NULL, and it will be opened as the owner.
1451 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1452 * which prevents all vnode ops from running.
1453 */
1454static int
1455zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1456{
1457	int error = 0;
1458
1459	if (getzfsvfs(name, zfvp) != 0)
1460		error = zfsvfs_create(name, zfvp);
1461	if (error == 0) {
1462		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1463		    RW_READER, tag);
1464		if ((*zfvp)->z_unmounted) {
1465			/*
1466			 * XXX we could probably try again, since the unmounting
1467			 * thread should be just about to disassociate the
1468			 * objset from the zfsvfs.
1469			 */
1470			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1471			return (SET_ERROR(EBUSY));
1472		}
1473	}
1474	return (error);
1475}
1476
1477static void
1478zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1479{
1480	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1481
1482	if (zfsvfs->z_vfs) {
1483		VFS_RELE(zfsvfs->z_vfs);
1484	} else {
1485		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1486		zfsvfs_free(zfsvfs);
1487	}
1488}
1489
1490static int
1491zfs_ioc_pool_create(zfs_cmd_t *zc)
1492{
1493	int error;
1494	nvlist_t *config, *props = NULL;
1495	nvlist_t *rootprops = NULL;
1496	nvlist_t *zplprops = NULL;
1497
1498	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1499	    zc->zc_iflags, &config))
1500		return (error);
1501
1502	if (zc->zc_nvlist_src_size != 0 && (error =
1503	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1504	    zc->zc_iflags, &props))) {
1505		nvlist_free(config);
1506		return (error);
1507	}
1508
1509	if (props) {
1510		nvlist_t *nvl = NULL;
1511		uint64_t version = SPA_VERSION;
1512
1513		(void) nvlist_lookup_uint64(props,
1514		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1515		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1516			error = SET_ERROR(EINVAL);
1517			goto pool_props_bad;
1518		}
1519		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1520		if (nvl) {
1521			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1522			if (error != 0) {
1523				nvlist_free(config);
1524				nvlist_free(props);
1525				return (error);
1526			}
1527			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1528		}
1529		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1530		error = zfs_fill_zplprops_root(version, rootprops,
1531		    zplprops, NULL);
1532		if (error != 0)
1533			goto pool_props_bad;
1534	}
1535
1536	error = spa_create(zc->zc_name, config, props, zplprops);
1537
1538	/*
1539	 * Set the remaining root properties
1540	 */
1541	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1542	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1543		(void) spa_destroy(zc->zc_name);
1544
1545pool_props_bad:
1546	nvlist_free(rootprops);
1547	nvlist_free(zplprops);
1548	nvlist_free(config);
1549	nvlist_free(props);
1550
1551	return (error);
1552}
1553
1554static int
1555zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1556{
1557	int error;
1558	zfs_log_history(zc);
1559	error = spa_destroy(zc->zc_name);
1560	if (error == 0)
1561		zvol_remove_minors(zc->zc_name);
1562	return (error);
1563}
1564
1565static int
1566zfs_ioc_pool_import(zfs_cmd_t *zc)
1567{
1568	nvlist_t *config, *props = NULL;
1569	uint64_t guid;
1570	int error;
1571
1572	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1573	    zc->zc_iflags, &config)) != 0)
1574		return (error);
1575
1576	if (zc->zc_nvlist_src_size != 0 && (error =
1577	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1578	    zc->zc_iflags, &props))) {
1579		nvlist_free(config);
1580		return (error);
1581	}
1582
1583	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1584	    guid != zc->zc_guid)
1585		error = SET_ERROR(EINVAL);
1586	else
1587		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1588
1589	if (zc->zc_nvlist_dst != 0) {
1590		int err;
1591
1592		if ((err = put_nvlist(zc, config)) != 0)
1593			error = err;
1594	}
1595
1596	nvlist_free(config);
1597
1598	if (props)
1599		nvlist_free(props);
1600
1601	return (error);
1602}
1603
1604static int
1605zfs_ioc_pool_export(zfs_cmd_t *zc)
1606{
1607	int error;
1608	boolean_t force = (boolean_t)zc->zc_cookie;
1609	boolean_t hardforce = (boolean_t)zc->zc_guid;
1610
1611	zfs_log_history(zc);
1612	error = spa_export(zc->zc_name, NULL, force, hardforce);
1613	if (error == 0)
1614		zvol_remove_minors(zc->zc_name);
1615	return (error);
1616}
1617
1618static int
1619zfs_ioc_pool_configs(zfs_cmd_t *zc)
1620{
1621	nvlist_t *configs;
1622	int error;
1623
1624	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1625		return (SET_ERROR(EEXIST));
1626
1627	error = put_nvlist(zc, configs);
1628
1629	nvlist_free(configs);
1630
1631	return (error);
1632}
1633
1634/*
1635 * inputs:
1636 * zc_name		name of the pool
1637 *
1638 * outputs:
1639 * zc_cookie		real errno
1640 * zc_nvlist_dst	config nvlist
1641 * zc_nvlist_dst_size	size of config nvlist
1642 */
1643static int
1644zfs_ioc_pool_stats(zfs_cmd_t *zc)
1645{
1646	nvlist_t *config;
1647	int error;
1648	int ret = 0;
1649
1650	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1651	    sizeof (zc->zc_value));
1652
1653	if (config != NULL) {
1654		ret = put_nvlist(zc, config);
1655		nvlist_free(config);
1656
1657		/*
1658		 * The config may be present even if 'error' is non-zero.
1659		 * In this case we return success, and preserve the real errno
1660		 * in 'zc_cookie'.
1661		 */
1662		zc->zc_cookie = error;
1663	} else {
1664		ret = error;
1665	}
1666
1667	return (ret);
1668}
1669
1670/*
1671 * Try to import the given pool, returning pool stats as appropriate so that
1672 * user land knows which devices are available and overall pool health.
1673 */
1674static int
1675zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1676{
1677	nvlist_t *tryconfig, *config;
1678	int error;
1679
1680	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1681	    zc->zc_iflags, &tryconfig)) != 0)
1682		return (error);
1683
1684	config = spa_tryimport(tryconfig);
1685
1686	nvlist_free(tryconfig);
1687
1688	if (config == NULL)
1689		return (SET_ERROR(EINVAL));
1690
1691	error = put_nvlist(zc, config);
1692	nvlist_free(config);
1693
1694	return (error);
1695}
1696
1697/*
1698 * inputs:
1699 * zc_name              name of the pool
1700 * zc_cookie            scan func (pool_scan_func_t)
1701 */
1702static int
1703zfs_ioc_pool_scan(zfs_cmd_t *zc)
1704{
1705	spa_t *spa;
1706	int error;
1707
1708	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1709		return (error);
1710
1711	if (zc->zc_cookie == POOL_SCAN_NONE)
1712		error = spa_scan_stop(spa);
1713	else
1714		error = spa_scan(spa, zc->zc_cookie);
1715
1716	spa_close(spa, FTAG);
1717
1718	return (error);
1719}
1720
1721static int
1722zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1723{
1724	spa_t *spa;
1725	int error;
1726
1727	error = spa_open(zc->zc_name, &spa, FTAG);
1728	if (error == 0) {
1729		spa_freeze(spa);
1730		spa_close(spa, FTAG);
1731	}
1732	return (error);
1733}
1734
1735static int
1736zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1737{
1738	spa_t *spa;
1739	int error;
1740
1741	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1742		return (error);
1743
1744	if (zc->zc_cookie < spa_version(spa) ||
1745	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1746		spa_close(spa, FTAG);
1747		return (SET_ERROR(EINVAL));
1748	}
1749
1750	spa_upgrade(spa, zc->zc_cookie);
1751	spa_close(spa, FTAG);
1752
1753	return (error);
1754}
1755
1756static int
1757zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1758{
1759	spa_t *spa;
1760	char *hist_buf;
1761	uint64_t size;
1762	int error;
1763
1764	if ((size = zc->zc_history_len) == 0)
1765		return (SET_ERROR(EINVAL));
1766
1767	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1768		return (error);
1769
1770	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1771		spa_close(spa, FTAG);
1772		return (SET_ERROR(ENOTSUP));
1773	}
1774
1775	hist_buf = kmem_alloc(size, KM_SLEEP);
1776	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1777	    &zc->zc_history_len, hist_buf)) == 0) {
1778		error = ddi_copyout(hist_buf,
1779		    (void *)(uintptr_t)zc->zc_history,
1780		    zc->zc_history_len, zc->zc_iflags);
1781	}
1782
1783	spa_close(spa, FTAG);
1784	kmem_free(hist_buf, size);
1785	return (error);
1786}
1787
1788static int
1789zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1790{
1791	spa_t *spa;
1792	int error;
1793
1794	error = spa_open(zc->zc_name, &spa, FTAG);
1795	if (error == 0) {
1796		error = spa_change_guid(spa);
1797		spa_close(spa, FTAG);
1798	}
1799	return (error);
1800}
1801
1802static int
1803zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1804{
1805	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1806}
1807
1808/*
1809 * inputs:
1810 * zc_name		name of filesystem
1811 * zc_obj		object to find
1812 *
1813 * outputs:
1814 * zc_value		name of object
1815 */
1816static int
1817zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1818{
1819	objset_t *os;
1820	int error;
1821
1822	/* XXX reading from objset not owned */
1823	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1824		return (error);
1825	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1826		dmu_objset_rele(os, FTAG);
1827		return (SET_ERROR(EINVAL));
1828	}
1829	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1830	    sizeof (zc->zc_value));
1831	dmu_objset_rele(os, FTAG);
1832
1833	return (error);
1834}
1835
1836/*
1837 * inputs:
1838 * zc_name		name of filesystem
1839 * zc_obj		object to find
1840 *
1841 * outputs:
1842 * zc_stat		stats on object
1843 * zc_value		path to object
1844 */
1845static int
1846zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1847{
1848	objset_t *os;
1849	int error;
1850
1851	/* XXX reading from objset not owned */
1852	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1853		return (error);
1854	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1855		dmu_objset_rele(os, FTAG);
1856		return (SET_ERROR(EINVAL));
1857	}
1858	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1859	    sizeof (zc->zc_value));
1860	dmu_objset_rele(os, FTAG);
1861
1862	return (error);
1863}
1864
1865static int
1866zfs_ioc_vdev_add(zfs_cmd_t *zc)
1867{
1868	spa_t *spa;
1869	int error;
1870	nvlist_t *config, **l2cache, **spares;
1871	uint_t nl2cache = 0, nspares = 0;
1872
1873	error = spa_open(zc->zc_name, &spa, FTAG);
1874	if (error != 0)
1875		return (error);
1876
1877	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1878	    zc->zc_iflags, &config);
1879	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1880	    &l2cache, &nl2cache);
1881
1882	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1883	    &spares, &nspares);
1884
1885#ifdef illumos
1886	/*
1887	 * A root pool with concatenated devices is not supported.
1888	 * Thus, can not add a device to a root pool.
1889	 *
1890	 * Intent log device can not be added to a rootpool because
1891	 * during mountroot, zil is replayed, a seperated log device
1892	 * can not be accessed during the mountroot time.
1893	 *
1894	 * l2cache and spare devices are ok to be added to a rootpool.
1895	 */
1896	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1897		nvlist_free(config);
1898		spa_close(spa, FTAG);
1899		return (SET_ERROR(EDOM));
1900	}
1901#endif /* illumos */
1902
1903	if (error == 0) {
1904		error = spa_vdev_add(spa, config);
1905		nvlist_free(config);
1906	}
1907	spa_close(spa, FTAG);
1908	return (error);
1909}
1910
1911/*
1912 * inputs:
1913 * zc_name		name of the pool
1914 * zc_nvlist_conf	nvlist of devices to remove
1915 * zc_cookie		to stop the remove?
1916 */
1917static int
1918zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1919{
1920	spa_t *spa;
1921	int error;
1922
1923	error = spa_open(zc->zc_name, &spa, FTAG);
1924	if (error != 0)
1925		return (error);
1926	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1927	spa_close(spa, FTAG);
1928	return (error);
1929}
1930
1931static int
1932zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1933{
1934	spa_t *spa;
1935	int error;
1936	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1937
1938	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1939		return (error);
1940	switch (zc->zc_cookie) {
1941	case VDEV_STATE_ONLINE:
1942		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1943		break;
1944
1945	case VDEV_STATE_OFFLINE:
1946		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1947		break;
1948
1949	case VDEV_STATE_FAULTED:
1950		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1951		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1952			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1953
1954		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1955		break;
1956
1957	case VDEV_STATE_DEGRADED:
1958		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1959		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1960			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1961
1962		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1963		break;
1964
1965	default:
1966		error = SET_ERROR(EINVAL);
1967	}
1968	zc->zc_cookie = newstate;
1969	spa_close(spa, FTAG);
1970	return (error);
1971}
1972
1973static int
1974zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1975{
1976	spa_t *spa;
1977	int replacing = zc->zc_cookie;
1978	nvlist_t *config;
1979	int error;
1980
1981	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1982		return (error);
1983
1984	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1985	    zc->zc_iflags, &config)) == 0) {
1986		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1987		nvlist_free(config);
1988	}
1989
1990	spa_close(spa, FTAG);
1991	return (error);
1992}
1993
1994static int
1995zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1996{
1997	spa_t *spa;
1998	int error;
1999
2000	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2001		return (error);
2002
2003	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2004
2005	spa_close(spa, FTAG);
2006	return (error);
2007}
2008
2009static int
2010zfs_ioc_vdev_split(zfs_cmd_t *zc)
2011{
2012	spa_t *spa;
2013	nvlist_t *config, *props = NULL;
2014	int error;
2015	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2016
2017	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2018		return (error);
2019
2020	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2021	    zc->zc_iflags, &config)) {
2022		spa_close(spa, FTAG);
2023		return (error);
2024	}
2025
2026	if (zc->zc_nvlist_src_size != 0 && (error =
2027	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2028	    zc->zc_iflags, &props))) {
2029		spa_close(spa, FTAG);
2030		nvlist_free(config);
2031		return (error);
2032	}
2033
2034	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2035
2036	spa_close(spa, FTAG);
2037
2038	nvlist_free(config);
2039	nvlist_free(props);
2040
2041	return (error);
2042}
2043
2044static int
2045zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2046{
2047	spa_t *spa;
2048	char *path = zc->zc_value;
2049	uint64_t guid = zc->zc_guid;
2050	int error;
2051
2052	error = spa_open(zc->zc_name, &spa, FTAG);
2053	if (error != 0)
2054		return (error);
2055
2056	error = spa_vdev_setpath(spa, guid, path);
2057	spa_close(spa, FTAG);
2058	return (error);
2059}
2060
2061static int
2062zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2063{
2064	spa_t *spa;
2065	char *fru = zc->zc_value;
2066	uint64_t guid = zc->zc_guid;
2067	int error;
2068
2069	error = spa_open(zc->zc_name, &spa, FTAG);
2070	if (error != 0)
2071		return (error);
2072
2073	error = spa_vdev_setfru(spa, guid, fru);
2074	spa_close(spa, FTAG);
2075	return (error);
2076}
2077
2078static int
2079zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2080{
2081	int error = 0;
2082	nvlist_t *nv;
2083
2084	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2085
2086	if (zc->zc_nvlist_dst != 0 &&
2087	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2088		dmu_objset_stats(os, nv);
2089		/*
2090		 * NB: zvol_get_stats() will read the objset contents,
2091		 * which we aren't supposed to do with a
2092		 * DS_MODE_USER hold, because it could be
2093		 * inconsistent.  So this is a bit of a workaround...
2094		 * XXX reading with out owning
2095		 */
2096		if (!zc->zc_objset_stats.dds_inconsistent &&
2097		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2098			error = zvol_get_stats(os, nv);
2099			if (error == EIO)
2100				return (error);
2101			VERIFY0(error);
2102		}
2103		error = put_nvlist(zc, nv);
2104		nvlist_free(nv);
2105	}
2106
2107	return (error);
2108}
2109
2110/*
2111 * inputs:
2112 * zc_name		name of filesystem
2113 * zc_nvlist_dst_size	size of buffer for property nvlist
2114 *
2115 * outputs:
2116 * zc_objset_stats	stats
2117 * zc_nvlist_dst	property nvlist
2118 * zc_nvlist_dst_size	size of property nvlist
2119 */
2120static int
2121zfs_ioc_objset_stats(zfs_cmd_t *zc)
2122{
2123	objset_t *os;
2124	int error;
2125
2126	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2127	if (error == 0) {
2128		error = zfs_ioc_objset_stats_impl(zc, os);
2129		dmu_objset_rele(os, FTAG);
2130	}
2131
2132	if (error == ENOMEM)
2133		error = 0;
2134	return (error);
2135}
2136
2137/*
2138 * inputs:
2139 * zc_name		name of filesystem
2140 * zc_nvlist_dst_size	size of buffer for property nvlist
2141 *
2142 * outputs:
2143 * zc_nvlist_dst	received property nvlist
2144 * zc_nvlist_dst_size	size of received property nvlist
2145 *
2146 * Gets received properties (distinct from local properties on or after
2147 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2148 * local property values.
2149 */
2150static int
2151zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2152{
2153	int error = 0;
2154	nvlist_t *nv;
2155
2156	/*
2157	 * Without this check, we would return local property values if the
2158	 * caller has not already received properties on or after
2159	 * SPA_VERSION_RECVD_PROPS.
2160	 */
2161	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2162		return (SET_ERROR(ENOTSUP));
2163
2164	if (zc->zc_nvlist_dst != 0 &&
2165	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2166		error = put_nvlist(zc, nv);
2167		nvlist_free(nv);
2168	}
2169
2170	return (error);
2171}
2172
2173static int
2174nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2175{
2176	uint64_t value;
2177	int error;
2178
2179	/*
2180	 * zfs_get_zplprop() will either find a value or give us
2181	 * the default value (if there is one).
2182	 */
2183	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2184		return (error);
2185	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2186	return (0);
2187}
2188
2189/*
2190 * inputs:
2191 * zc_name		name of filesystem
2192 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2193 *
2194 * outputs:
2195 * zc_nvlist_dst	zpl property nvlist
2196 * zc_nvlist_dst_size	size of zpl property nvlist
2197 */
2198static int
2199zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2200{
2201	objset_t *os;
2202	int err;
2203
2204	/* XXX reading without owning */
2205	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2206		return (err);
2207
2208	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2209
2210	/*
2211	 * NB: nvl_add_zplprop() will read the objset contents,
2212	 * which we aren't supposed to do with a DS_MODE_USER
2213	 * hold, because it could be inconsistent.
2214	 */
2215	if (zc->zc_nvlist_dst != 0 &&
2216	    !zc->zc_objset_stats.dds_inconsistent &&
2217	    dmu_objset_type(os) == DMU_OST_ZFS) {
2218		nvlist_t *nv;
2219
2220		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2221		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2222		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2223		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2224		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2225			err = put_nvlist(zc, nv);
2226		nvlist_free(nv);
2227	} else {
2228		err = SET_ERROR(ENOENT);
2229	}
2230	dmu_objset_rele(os, FTAG);
2231	return (err);
2232}
2233
2234boolean_t
2235dataset_name_hidden(const char *name)
2236{
2237	/*
2238	 * Skip over datasets that are not visible in this zone,
2239	 * internal datasets (which have a $ in their name), and
2240	 * temporary datasets (which have a % in their name).
2241	 */
2242	if (strchr(name, '$') != NULL)
2243		return (B_TRUE);
2244	if (strchr(name, '%') != NULL)
2245		return (B_TRUE);
2246	if (!INGLOBALZONE(curthread) && !zone_dataset_visible(name, NULL))
2247		return (B_TRUE);
2248	return (B_FALSE);
2249}
2250
2251/*
2252 * inputs:
2253 * zc_name		name of filesystem
2254 * zc_cookie		zap cursor
2255 * zc_nvlist_dst_size	size of buffer for property nvlist
2256 *
2257 * outputs:
2258 * zc_name		name of next filesystem
2259 * zc_cookie		zap cursor
2260 * zc_objset_stats	stats
2261 * zc_nvlist_dst	property nvlist
2262 * zc_nvlist_dst_size	size of property nvlist
2263 */
2264static int
2265zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2266{
2267	objset_t *os;
2268	int error;
2269	char *p;
2270	size_t orig_len = strlen(zc->zc_name);
2271
2272top:
2273	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2274		if (error == ENOENT)
2275			error = SET_ERROR(ESRCH);
2276		return (error);
2277	}
2278
2279	p = strrchr(zc->zc_name, '/');
2280	if (p == NULL || p[1] != '\0')
2281		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2282	p = zc->zc_name + strlen(zc->zc_name);
2283
2284	do {
2285		error = dmu_dir_list_next(os,
2286		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2287		    NULL, &zc->zc_cookie);
2288		if (error == ENOENT)
2289			error = SET_ERROR(ESRCH);
2290	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2291	dmu_objset_rele(os, FTAG);
2292
2293	/*
2294	 * If it's an internal dataset (ie. with a '$' in its name),
2295	 * don't try to get stats for it, otherwise we'll return ENOENT.
2296	 */
2297	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2298		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2299		if (error == ENOENT) {
2300			/* We lost a race with destroy, get the next one. */
2301			zc->zc_name[orig_len] = '\0';
2302			goto top;
2303		}
2304	}
2305	return (error);
2306}
2307
2308/*
2309 * inputs:
2310 * zc_name		name of filesystem
2311 * zc_cookie		zap cursor
2312 * zc_nvlist_dst_size	size of buffer for property nvlist
2313 * zc_simple		when set, only name is requested
2314 *
2315 * outputs:
2316 * zc_name		name of next snapshot
2317 * zc_objset_stats	stats
2318 * zc_nvlist_dst	property nvlist
2319 * zc_nvlist_dst_size	size of property nvlist
2320 */
2321static int
2322zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2323{
2324	objset_t *os;
2325	int error;
2326
2327	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2328	if (error != 0) {
2329		return (error == ENOENT ? ESRCH : error);
2330	}
2331
2332	/*
2333	 * A dataset name of maximum length cannot have any snapshots,
2334	 * so exit immediately.
2335	 */
2336	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
2337		dmu_objset_rele(os, FTAG);
2338		return (SET_ERROR(ESRCH));
2339	}
2340
2341	error = dmu_snapshot_list_next(os,
2342	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2343	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2344	    NULL);
2345
2346	if (error == 0 && !zc->zc_simple) {
2347		dsl_dataset_t *ds;
2348		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2349
2350		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2351		if (error == 0) {
2352			objset_t *ossnap;
2353
2354			error = dmu_objset_from_ds(ds, &ossnap);
2355			if (error == 0)
2356				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2357			dsl_dataset_rele(ds, FTAG);
2358		}
2359	} else if (error == ENOENT) {
2360		error = SET_ERROR(ESRCH);
2361	}
2362
2363	dmu_objset_rele(os, FTAG);
2364	/* if we failed, undo the @ that we tacked on to zc_name */
2365	if (error != 0)
2366		*strchr(zc->zc_name, '@') = '\0';
2367	return (error);
2368}
2369
2370static int
2371zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2372{
2373	const char *propname = nvpair_name(pair);
2374	uint64_t *valary;
2375	unsigned int vallen;
2376	const char *domain;
2377	char *dash;
2378	zfs_userquota_prop_t type;
2379	uint64_t rid;
2380	uint64_t quota;
2381	zfsvfs_t *zfsvfs;
2382	int err;
2383
2384	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2385		nvlist_t *attrs;
2386		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2387		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2388		    &pair) != 0)
2389			return (SET_ERROR(EINVAL));
2390	}
2391
2392	/*
2393	 * A correctly constructed propname is encoded as
2394	 * userquota@<rid>-<domain>.
2395	 */
2396	if ((dash = strchr(propname, '-')) == NULL ||
2397	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2398	    vallen != 3)
2399		return (SET_ERROR(EINVAL));
2400
2401	domain = dash + 1;
2402	type = valary[0];
2403	rid = valary[1];
2404	quota = valary[2];
2405
2406	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2407	if (err == 0) {
2408		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2409		zfsvfs_rele(zfsvfs, FTAG);
2410	}
2411
2412	return (err);
2413}
2414
2415/*
2416 * If the named property is one that has a special function to set its value,
2417 * return 0 on success and a positive error code on failure; otherwise if it is
2418 * not one of the special properties handled by this function, return -1.
2419 *
2420 * XXX: It would be better for callers of the property interface if we handled
2421 * these special cases in dsl_prop.c (in the dsl layer).
2422 */
2423static int
2424zfs_prop_set_special(const char *dsname, zprop_source_t source,
2425    nvpair_t *pair)
2426{
2427	const char *propname = nvpair_name(pair);
2428	zfs_prop_t prop = zfs_name_to_prop(propname);
2429	uint64_t intval;
2430	int err = -1;
2431
2432	if (prop == ZPROP_INVAL) {
2433		if (zfs_prop_userquota(propname))
2434			return (zfs_prop_set_userquota(dsname, pair));
2435		return (-1);
2436	}
2437
2438	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2439		nvlist_t *attrs;
2440		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2441		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2442		    &pair) == 0);
2443	}
2444
2445	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2446		return (-1);
2447
2448	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2449
2450	switch (prop) {
2451	case ZFS_PROP_QUOTA:
2452		err = dsl_dir_set_quota(dsname, source, intval);
2453		break;
2454	case ZFS_PROP_REFQUOTA:
2455		err = dsl_dataset_set_refquota(dsname, source, intval);
2456		break;
2457	case ZFS_PROP_FILESYSTEM_LIMIT:
2458	case ZFS_PROP_SNAPSHOT_LIMIT:
2459		if (intval == UINT64_MAX) {
2460			/* clearing the limit, just do it */
2461			err = 0;
2462		} else {
2463			err = dsl_dir_activate_fs_ss_limit(dsname);
2464		}
2465		/*
2466		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2467		 * default path to set the value in the nvlist.
2468		 */
2469		if (err == 0)
2470			err = -1;
2471		break;
2472	case ZFS_PROP_RESERVATION:
2473		err = dsl_dir_set_reservation(dsname, source, intval);
2474		break;
2475	case ZFS_PROP_REFRESERVATION:
2476		err = dsl_dataset_set_refreservation(dsname, source, intval);
2477		break;
2478	case ZFS_PROP_VOLSIZE:
2479		err = zvol_set_volsize(dsname, intval);
2480		break;
2481	case ZFS_PROP_VERSION:
2482	{
2483		zfsvfs_t *zfsvfs;
2484
2485		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2486			break;
2487
2488		err = zfs_set_version(zfsvfs, intval);
2489		zfsvfs_rele(zfsvfs, FTAG);
2490
2491		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2492			zfs_cmd_t *zc;
2493
2494			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2495			(void) strcpy(zc->zc_name, dsname);
2496			(void) zfs_ioc_userspace_upgrade(zc);
2497			kmem_free(zc, sizeof (zfs_cmd_t));
2498		}
2499		break;
2500	}
2501	default:
2502		err = -1;
2503	}
2504
2505	return (err);
2506}
2507
2508/*
2509 * This function is best effort. If it fails to set any of the given properties,
2510 * it continues to set as many as it can and returns the last error
2511 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2512 * with the list of names of all the properties that failed along with the
2513 * corresponding error numbers.
2514 *
2515 * If every property is set successfully, zero is returned and errlist is not
2516 * modified.
2517 */
2518int
2519zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2520    nvlist_t *errlist)
2521{
2522	nvpair_t *pair;
2523	nvpair_t *propval;
2524	int rv = 0;
2525	uint64_t intval;
2526	char *strval;
2527	nvlist_t *genericnvl = fnvlist_alloc();
2528	nvlist_t *retrynvl = fnvlist_alloc();
2529
2530retry:
2531	pair = NULL;
2532	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2533		const char *propname = nvpair_name(pair);
2534		zfs_prop_t prop = zfs_name_to_prop(propname);
2535		int err = 0;
2536
2537		/* decode the property value */
2538		propval = pair;
2539		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2540			nvlist_t *attrs;
2541			attrs = fnvpair_value_nvlist(pair);
2542			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2543			    &propval) != 0)
2544				err = SET_ERROR(EINVAL);
2545		}
2546
2547		/* Validate value type */
2548		if (err == 0 && prop == ZPROP_INVAL) {
2549			if (zfs_prop_user(propname)) {
2550				if (nvpair_type(propval) != DATA_TYPE_STRING)
2551					err = SET_ERROR(EINVAL);
2552			} else if (zfs_prop_userquota(propname)) {
2553				if (nvpair_type(propval) !=
2554				    DATA_TYPE_UINT64_ARRAY)
2555					err = SET_ERROR(EINVAL);
2556			} else {
2557				err = SET_ERROR(EINVAL);
2558			}
2559		} else if (err == 0) {
2560			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2561				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2562					err = SET_ERROR(EINVAL);
2563			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2564				const char *unused;
2565
2566				intval = fnvpair_value_uint64(propval);
2567
2568				switch (zfs_prop_get_type(prop)) {
2569				case PROP_TYPE_NUMBER:
2570					break;
2571				case PROP_TYPE_STRING:
2572					err = SET_ERROR(EINVAL);
2573					break;
2574				case PROP_TYPE_INDEX:
2575					if (zfs_prop_index_to_string(prop,
2576					    intval, &unused) != 0)
2577						err = SET_ERROR(EINVAL);
2578					break;
2579				default:
2580					cmn_err(CE_PANIC,
2581					    "unknown property type");
2582				}
2583			} else {
2584				err = SET_ERROR(EINVAL);
2585			}
2586		}
2587
2588		/* Validate permissions */
2589		if (err == 0)
2590			err = zfs_check_settable(dsname, pair, CRED());
2591
2592		if (err == 0) {
2593			err = zfs_prop_set_special(dsname, source, pair);
2594			if (err == -1) {
2595				/*
2596				 * For better performance we build up a list of
2597				 * properties to set in a single transaction.
2598				 */
2599				err = nvlist_add_nvpair(genericnvl, pair);
2600			} else if (err != 0 && nvl != retrynvl) {
2601				/*
2602				 * This may be a spurious error caused by
2603				 * receiving quota and reservation out of order.
2604				 * Try again in a second pass.
2605				 */
2606				err = nvlist_add_nvpair(retrynvl, pair);
2607			}
2608		}
2609
2610		if (err != 0) {
2611			if (errlist != NULL)
2612				fnvlist_add_int32(errlist, propname, err);
2613			rv = err;
2614		}
2615	}
2616
2617	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2618		nvl = retrynvl;
2619		goto retry;
2620	}
2621
2622	if (!nvlist_empty(genericnvl) &&
2623	    dsl_props_set(dsname, source, genericnvl) != 0) {
2624		/*
2625		 * If this fails, we still want to set as many properties as we
2626		 * can, so try setting them individually.
2627		 */
2628		pair = NULL;
2629		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2630			const char *propname = nvpair_name(pair);
2631			int err = 0;
2632
2633			propval = pair;
2634			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2635				nvlist_t *attrs;
2636				attrs = fnvpair_value_nvlist(pair);
2637				propval = fnvlist_lookup_nvpair(attrs,
2638				    ZPROP_VALUE);
2639			}
2640
2641			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2642				strval = fnvpair_value_string(propval);
2643				err = dsl_prop_set_string(dsname, propname,
2644				    source, strval);
2645			} else {
2646				intval = fnvpair_value_uint64(propval);
2647				err = dsl_prop_set_int(dsname, propname, source,
2648				    intval);
2649			}
2650
2651			if (err != 0) {
2652				if (errlist != NULL) {
2653					fnvlist_add_int32(errlist, propname,
2654					    err);
2655				}
2656				rv = err;
2657			}
2658		}
2659	}
2660	nvlist_free(genericnvl);
2661	nvlist_free(retrynvl);
2662
2663	return (rv);
2664}
2665
2666/*
2667 * Check that all the properties are valid user properties.
2668 */
2669static int
2670zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2671{
2672	nvpair_t *pair = NULL;
2673	int error = 0;
2674
2675	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2676		const char *propname = nvpair_name(pair);
2677
2678		if (!zfs_prop_user(propname) ||
2679		    nvpair_type(pair) != DATA_TYPE_STRING)
2680			return (SET_ERROR(EINVAL));
2681
2682		if (error = zfs_secpolicy_write_perms(fsname,
2683		    ZFS_DELEG_PERM_USERPROP, CRED()))
2684			return (error);
2685
2686		if (strlen(propname) >= ZAP_MAXNAMELEN)
2687			return (SET_ERROR(ENAMETOOLONG));
2688
2689		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2690			return (E2BIG);
2691	}
2692	return (0);
2693}
2694
2695static void
2696props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2697{
2698	nvpair_t *pair;
2699
2700	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2701
2702	pair = NULL;
2703	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2704		if (nvlist_exists(skipped, nvpair_name(pair)))
2705			continue;
2706
2707		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2708	}
2709}
2710
2711static int
2712clear_received_props(const char *dsname, nvlist_t *props,
2713    nvlist_t *skipped)
2714{
2715	int err = 0;
2716	nvlist_t *cleared_props = NULL;
2717	props_skip(props, skipped, &cleared_props);
2718	if (!nvlist_empty(cleared_props)) {
2719		/*
2720		 * Acts on local properties until the dataset has received
2721		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2722		 */
2723		zprop_source_t flags = (ZPROP_SRC_NONE |
2724		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2725		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2726	}
2727	nvlist_free(cleared_props);
2728	return (err);
2729}
2730
2731/*
2732 * inputs:
2733 * zc_name		name of filesystem
2734 * zc_value		name of property to set
2735 * zc_nvlist_src{_size}	nvlist of properties to apply
2736 * zc_cookie		received properties flag
2737 *
2738 * outputs:
2739 * zc_nvlist_dst{_size} error for each unapplied received property
2740 */
2741static int
2742zfs_ioc_set_prop(zfs_cmd_t *zc)
2743{
2744	nvlist_t *nvl;
2745	boolean_t received = zc->zc_cookie;
2746	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2747	    ZPROP_SRC_LOCAL);
2748	nvlist_t *errors;
2749	int error;
2750
2751	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2752	    zc->zc_iflags, &nvl)) != 0)
2753		return (error);
2754
2755	if (received) {
2756		nvlist_t *origprops;
2757
2758		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2759			(void) clear_received_props(zc->zc_name,
2760			    origprops, nvl);
2761			nvlist_free(origprops);
2762		}
2763
2764		error = dsl_prop_set_hasrecvd(zc->zc_name);
2765	}
2766
2767	errors = fnvlist_alloc();
2768	if (error == 0)
2769		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2770
2771	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2772		(void) put_nvlist(zc, errors);
2773	}
2774
2775	nvlist_free(errors);
2776	nvlist_free(nvl);
2777	return (error);
2778}
2779
2780/*
2781 * inputs:
2782 * zc_name		name of filesystem
2783 * zc_value		name of property to inherit
2784 * zc_cookie		revert to received value if TRUE
2785 *
2786 * outputs:		none
2787 */
2788static int
2789zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2790{
2791	const char *propname = zc->zc_value;
2792	zfs_prop_t prop = zfs_name_to_prop(propname);
2793	boolean_t received = zc->zc_cookie;
2794	zprop_source_t source = (received
2795	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2796	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2797
2798	if (received) {
2799		nvlist_t *dummy;
2800		nvpair_t *pair;
2801		zprop_type_t type;
2802		int err;
2803
2804		/*
2805		 * zfs_prop_set_special() expects properties in the form of an
2806		 * nvpair with type info.
2807		 */
2808		if (prop == ZPROP_INVAL) {
2809			if (!zfs_prop_user(propname))
2810				return (SET_ERROR(EINVAL));
2811
2812			type = PROP_TYPE_STRING;
2813		} else if (prop == ZFS_PROP_VOLSIZE ||
2814		    prop == ZFS_PROP_VERSION) {
2815			return (SET_ERROR(EINVAL));
2816		} else {
2817			type = zfs_prop_get_type(prop);
2818		}
2819
2820		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2821
2822		switch (type) {
2823		case PROP_TYPE_STRING:
2824			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2825			break;
2826		case PROP_TYPE_NUMBER:
2827		case PROP_TYPE_INDEX:
2828			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2829			break;
2830		default:
2831			nvlist_free(dummy);
2832			return (SET_ERROR(EINVAL));
2833		}
2834
2835		pair = nvlist_next_nvpair(dummy, NULL);
2836		err = zfs_prop_set_special(zc->zc_name, source, pair);
2837		nvlist_free(dummy);
2838		if (err != -1)
2839			return (err); /* special property already handled */
2840	} else {
2841		/*
2842		 * Only check this in the non-received case. We want to allow
2843		 * 'inherit -S' to revert non-inheritable properties like quota
2844		 * and reservation to the received or default values even though
2845		 * they are not considered inheritable.
2846		 */
2847		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2848			return (SET_ERROR(EINVAL));
2849	}
2850
2851	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2852	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2853}
2854
2855static int
2856zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2857{
2858	nvlist_t *props;
2859	spa_t *spa;
2860	int error;
2861	nvpair_t *pair;
2862
2863	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2864	    zc->zc_iflags, &props))
2865		return (error);
2866
2867	/*
2868	 * If the only property is the configfile, then just do a spa_lookup()
2869	 * to handle the faulted case.
2870	 */
2871	pair = nvlist_next_nvpair(props, NULL);
2872	if (pair != NULL && strcmp(nvpair_name(pair),
2873	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2874	    nvlist_next_nvpair(props, pair) == NULL) {
2875		mutex_enter(&spa_namespace_lock);
2876		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2877			spa_configfile_set(spa, props, B_FALSE);
2878			spa_config_sync(spa, B_FALSE, B_TRUE);
2879		}
2880		mutex_exit(&spa_namespace_lock);
2881		if (spa != NULL) {
2882			nvlist_free(props);
2883			return (0);
2884		}
2885	}
2886
2887	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2888		nvlist_free(props);
2889		return (error);
2890	}
2891
2892	error = spa_prop_set(spa, props);
2893
2894	nvlist_free(props);
2895	spa_close(spa, FTAG);
2896
2897	return (error);
2898}
2899
2900static int
2901zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2902{
2903	spa_t *spa;
2904	int error;
2905	nvlist_t *nvp = NULL;
2906
2907	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2908		/*
2909		 * If the pool is faulted, there may be properties we can still
2910		 * get (such as altroot and cachefile), so attempt to get them
2911		 * anyway.
2912		 */
2913		mutex_enter(&spa_namespace_lock);
2914		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2915			error = spa_prop_get(spa, &nvp);
2916		mutex_exit(&spa_namespace_lock);
2917	} else {
2918		error = spa_prop_get(spa, &nvp);
2919		spa_close(spa, FTAG);
2920	}
2921
2922	if (error == 0 && zc->zc_nvlist_dst != 0)
2923		error = put_nvlist(zc, nvp);
2924	else
2925		error = SET_ERROR(EFAULT);
2926
2927	nvlist_free(nvp);
2928	return (error);
2929}
2930
2931/*
2932 * inputs:
2933 * zc_name		name of filesystem
2934 * zc_nvlist_src{_size}	nvlist of delegated permissions
2935 * zc_perm_action	allow/unallow flag
2936 *
2937 * outputs:		none
2938 */
2939static int
2940zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2941{
2942	int error;
2943	nvlist_t *fsaclnv = NULL;
2944
2945	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2946	    zc->zc_iflags, &fsaclnv)) != 0)
2947		return (error);
2948
2949	/*
2950	 * Verify nvlist is constructed correctly
2951	 */
2952	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2953		nvlist_free(fsaclnv);
2954		return (SET_ERROR(EINVAL));
2955	}
2956
2957	/*
2958	 * If we don't have PRIV_SYS_MOUNT, then validate
2959	 * that user is allowed to hand out each permission in
2960	 * the nvlist(s)
2961	 */
2962
2963	error = secpolicy_zfs(CRED());
2964	if (error != 0) {
2965		if (zc->zc_perm_action == B_FALSE) {
2966			error = dsl_deleg_can_allow(zc->zc_name,
2967			    fsaclnv, CRED());
2968		} else {
2969			error = dsl_deleg_can_unallow(zc->zc_name,
2970			    fsaclnv, CRED());
2971		}
2972	}
2973
2974	if (error == 0)
2975		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2976
2977	nvlist_free(fsaclnv);
2978	return (error);
2979}
2980
2981/*
2982 * inputs:
2983 * zc_name		name of filesystem
2984 *
2985 * outputs:
2986 * zc_nvlist_src{_size}	nvlist of delegated permissions
2987 */
2988static int
2989zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2990{
2991	nvlist_t *nvp;
2992	int error;
2993
2994	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2995		error = put_nvlist(zc, nvp);
2996		nvlist_free(nvp);
2997	}
2998
2999	return (error);
3000}
3001
3002/*
3003 * Search the vfs list for a specified resource.  Returns a pointer to it
3004 * or NULL if no suitable entry is found. The caller of this routine
3005 * is responsible for releasing the returned vfs pointer.
3006 */
3007static vfs_t *
3008zfs_get_vfs(const char *resource)
3009{
3010	vfs_t *vfsp;
3011
3012	mtx_lock(&mountlist_mtx);
3013	TAILQ_FOREACH(vfsp, &mountlist, mnt_list) {
3014		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
3015			VFS_HOLD(vfsp);
3016			break;
3017		}
3018	}
3019	mtx_unlock(&mountlist_mtx);
3020	return (vfsp);
3021}
3022
3023/* ARGSUSED */
3024static void
3025zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3026{
3027	zfs_creat_t *zct = arg;
3028
3029	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3030}
3031
3032#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3033
3034/*
3035 * inputs:
3036 * os			parent objset pointer (NULL if root fs)
3037 * fuids_ok		fuids allowed in this version of the spa?
3038 * sa_ok		SAs allowed in this version of the spa?
3039 * createprops		list of properties requested by creator
3040 *
3041 * outputs:
3042 * zplprops	values for the zplprops we attach to the master node object
3043 * is_ci	true if requested file system will be purely case-insensitive
3044 *
3045 * Determine the settings for utf8only, normalization and
3046 * casesensitivity.  Specific values may have been requested by the
3047 * creator and/or we can inherit values from the parent dataset.  If
3048 * the file system is of too early a vintage, a creator can not
3049 * request settings for these properties, even if the requested
3050 * setting is the default value.  We don't actually want to create dsl
3051 * properties for these, so remove them from the source nvlist after
3052 * processing.
3053 */
3054static int
3055zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3056    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3057    nvlist_t *zplprops, boolean_t *is_ci)
3058{
3059	uint64_t sense = ZFS_PROP_UNDEFINED;
3060	uint64_t norm = ZFS_PROP_UNDEFINED;
3061	uint64_t u8 = ZFS_PROP_UNDEFINED;
3062
3063	ASSERT(zplprops != NULL);
3064
3065	/*
3066	 * Pull out creator prop choices, if any.
3067	 */
3068	if (createprops) {
3069		(void) nvlist_lookup_uint64(createprops,
3070		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3071		(void) nvlist_lookup_uint64(createprops,
3072		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3073		(void) nvlist_remove_all(createprops,
3074		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3075		(void) nvlist_lookup_uint64(createprops,
3076		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3077		(void) nvlist_remove_all(createprops,
3078		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3079		(void) nvlist_lookup_uint64(createprops,
3080		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3081		(void) nvlist_remove_all(createprops,
3082		    zfs_prop_to_name(ZFS_PROP_CASE));
3083	}
3084
3085	/*
3086	 * If the zpl version requested is whacky or the file system
3087	 * or pool is version is too "young" to support normalization
3088	 * and the creator tried to set a value for one of the props,
3089	 * error out.
3090	 */
3091	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3092	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3093	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3094	    (zplver < ZPL_VERSION_NORMALIZATION &&
3095	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3096	    sense != ZFS_PROP_UNDEFINED)))
3097		return (SET_ERROR(ENOTSUP));
3098
3099	/*
3100	 * Put the version in the zplprops
3101	 */
3102	VERIFY(nvlist_add_uint64(zplprops,
3103	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3104
3105	if (norm == ZFS_PROP_UNDEFINED)
3106		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3107	VERIFY(nvlist_add_uint64(zplprops,
3108	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3109
3110	/*
3111	 * If we're normalizing, names must always be valid UTF-8 strings.
3112	 */
3113	if (norm)
3114		u8 = 1;
3115	if (u8 == ZFS_PROP_UNDEFINED)
3116		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3117	VERIFY(nvlist_add_uint64(zplprops,
3118	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3119
3120	if (sense == ZFS_PROP_UNDEFINED)
3121		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3122	VERIFY(nvlist_add_uint64(zplprops,
3123	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3124
3125	if (is_ci)
3126		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3127
3128	return (0);
3129}
3130
3131static int
3132zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3133    nvlist_t *zplprops, boolean_t *is_ci)
3134{
3135	boolean_t fuids_ok, sa_ok;
3136	uint64_t zplver = ZPL_VERSION;
3137	objset_t *os = NULL;
3138	char parentname[MAXNAMELEN];
3139	char *cp;
3140	spa_t *spa;
3141	uint64_t spa_vers;
3142	int error;
3143
3144	(void) strlcpy(parentname, dataset, sizeof (parentname));
3145	cp = strrchr(parentname, '/');
3146	ASSERT(cp != NULL);
3147	cp[0] = '\0';
3148
3149	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3150		return (error);
3151
3152	spa_vers = spa_version(spa);
3153	spa_close(spa, FTAG);
3154
3155	zplver = zfs_zpl_version_map(spa_vers);
3156	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3157	sa_ok = (zplver >= ZPL_VERSION_SA);
3158
3159	/*
3160	 * Open parent object set so we can inherit zplprop values.
3161	 */
3162	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3163		return (error);
3164
3165	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3166	    zplprops, is_ci);
3167	dmu_objset_rele(os, FTAG);
3168	return (error);
3169}
3170
3171static int
3172zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3173    nvlist_t *zplprops, boolean_t *is_ci)
3174{
3175	boolean_t fuids_ok;
3176	boolean_t sa_ok;
3177	uint64_t zplver = ZPL_VERSION;
3178	int error;
3179
3180	zplver = zfs_zpl_version_map(spa_vers);
3181	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3182	sa_ok = (zplver >= ZPL_VERSION_SA);
3183
3184	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3185	    createprops, zplprops, is_ci);
3186	return (error);
3187}
3188
3189/*
3190 * innvl: {
3191 *     "type" -> dmu_objset_type_t (int32)
3192 *     (optional) "props" -> { prop -> value }
3193 * }
3194 *
3195 * outnvl: propname -> error code (int32)
3196 */
3197static int
3198zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3199{
3200	int error = 0;
3201	zfs_creat_t zct = { 0 };
3202	nvlist_t *nvprops = NULL;
3203	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3204	int32_t type32;
3205	dmu_objset_type_t type;
3206	boolean_t is_insensitive = B_FALSE;
3207
3208	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3209		return (SET_ERROR(EINVAL));
3210	type = type32;
3211	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3212
3213	switch (type) {
3214	case DMU_OST_ZFS:
3215		cbfunc = zfs_create_cb;
3216		break;
3217
3218	case DMU_OST_ZVOL:
3219		cbfunc = zvol_create_cb;
3220		break;
3221
3222	default:
3223		cbfunc = NULL;
3224		break;
3225	}
3226	if (strchr(fsname, '@') ||
3227	    strchr(fsname, '%'))
3228		return (SET_ERROR(EINVAL));
3229
3230	zct.zct_props = nvprops;
3231
3232	if (cbfunc == NULL)
3233		return (SET_ERROR(EINVAL));
3234
3235	if (type == DMU_OST_ZVOL) {
3236		uint64_t volsize, volblocksize;
3237
3238		if (nvprops == NULL)
3239			return (SET_ERROR(EINVAL));
3240		if (nvlist_lookup_uint64(nvprops,
3241		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3242			return (SET_ERROR(EINVAL));
3243
3244		if ((error = nvlist_lookup_uint64(nvprops,
3245		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3246		    &volblocksize)) != 0 && error != ENOENT)
3247			return (SET_ERROR(EINVAL));
3248
3249		if (error != 0)
3250			volblocksize = zfs_prop_default_numeric(
3251			    ZFS_PROP_VOLBLOCKSIZE);
3252
3253		if ((error = zvol_check_volblocksize(
3254		    volblocksize)) != 0 ||
3255		    (error = zvol_check_volsize(volsize,
3256		    volblocksize)) != 0)
3257			return (error);
3258	} else if (type == DMU_OST_ZFS) {
3259		int error;
3260
3261		/*
3262		 * We have to have normalization and
3263		 * case-folding flags correct when we do the
3264		 * file system creation, so go figure them out
3265		 * now.
3266		 */
3267		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3268		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3269		error = zfs_fill_zplprops(fsname, nvprops,
3270		    zct.zct_zplprops, &is_insensitive);
3271		if (error != 0) {
3272			nvlist_free(zct.zct_zplprops);
3273			return (error);
3274		}
3275	}
3276
3277	error = dmu_objset_create(fsname, type,
3278	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3279	nvlist_free(zct.zct_zplprops);
3280
3281	/*
3282	 * It would be nice to do this atomically.
3283	 */
3284	if (error == 0) {
3285		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3286		    nvprops, outnvl);
3287		if (error != 0)
3288			(void) dsl_destroy_head(fsname);
3289	}
3290#ifdef __FreeBSD__
3291	if (error == 0 && type == DMU_OST_ZVOL)
3292		zvol_create_minors(fsname);
3293#endif
3294	return (error);
3295}
3296
3297/*
3298 * innvl: {
3299 *     "origin" -> name of origin snapshot
3300 *     (optional) "props" -> { prop -> value }
3301 * }
3302 *
3303 * outnvl: propname -> error code (int32)
3304 */
3305static int
3306zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3307{
3308	int error = 0;
3309	nvlist_t *nvprops = NULL;
3310	char *origin_name;
3311
3312	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3313		return (SET_ERROR(EINVAL));
3314	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3315
3316	if (strchr(fsname, '@') ||
3317	    strchr(fsname, '%'))
3318		return (SET_ERROR(EINVAL));
3319
3320	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3321		return (SET_ERROR(EINVAL));
3322	error = dmu_objset_clone(fsname, origin_name);
3323	if (error != 0)
3324		return (error);
3325
3326	/*
3327	 * It would be nice to do this atomically.
3328	 */
3329	if (error == 0) {
3330		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3331		    nvprops, outnvl);
3332		if (error != 0)
3333			(void) dsl_destroy_head(fsname);
3334	}
3335#ifdef __FreeBSD__
3336	if (error == 0)
3337		zvol_create_minors(fsname);
3338#endif
3339	return (error);
3340}
3341
3342/*
3343 * innvl: {
3344 *     "snaps" -> { snapshot1, snapshot2 }
3345 *     (optional) "props" -> { prop -> value (string) }
3346 * }
3347 *
3348 * outnvl: snapshot -> error code (int32)
3349 */
3350static int
3351zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3352{
3353	nvlist_t *snaps;
3354	nvlist_t *props = NULL;
3355	int error, poollen;
3356	nvpair_t *pair;
3357
3358	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3359	if ((error = zfs_check_userprops(poolname, props)) != 0)
3360		return (error);
3361
3362	if (!nvlist_empty(props) &&
3363	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3364		return (SET_ERROR(ENOTSUP));
3365
3366	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3367		return (SET_ERROR(EINVAL));
3368	poollen = strlen(poolname);
3369	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3370	    pair = nvlist_next_nvpair(snaps, pair)) {
3371		const char *name = nvpair_name(pair);
3372		const char *cp = strchr(name, '@');
3373
3374		/*
3375		 * The snap name must contain an @, and the part after it must
3376		 * contain only valid characters.
3377		 */
3378		if (cp == NULL ||
3379		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3380			return (SET_ERROR(EINVAL));
3381
3382		/*
3383		 * The snap must be in the specified pool.
3384		 */
3385		if (strncmp(name, poolname, poollen) != 0 ||
3386		    (name[poollen] != '/' && name[poollen] != '@'))
3387			return (SET_ERROR(EXDEV));
3388
3389		/* This must be the only snap of this fs. */
3390		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3391		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3392			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3393			    == 0) {
3394				return (SET_ERROR(EXDEV));
3395			}
3396		}
3397	}
3398
3399	error = dsl_dataset_snapshot(snaps, props, outnvl);
3400	return (error);
3401}
3402
3403/*
3404 * innvl: "message" -> string
3405 */
3406/* ARGSUSED */
3407static int
3408zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3409{
3410	char *message;
3411	spa_t *spa;
3412	int error;
3413	char *poolname;
3414
3415	/*
3416	 * The poolname in the ioctl is not set, we get it from the TSD,
3417	 * which was set at the end of the last successful ioctl that allows
3418	 * logging.  The secpolicy func already checked that it is set.
3419	 * Only one log ioctl is allowed after each successful ioctl, so
3420	 * we clear the TSD here.
3421	 */
3422	poolname = tsd_get(zfs_allow_log_key);
3423	(void) tsd_set(zfs_allow_log_key, NULL);
3424	error = spa_open(poolname, &spa, FTAG);
3425	strfree(poolname);
3426	if (error != 0)
3427		return (error);
3428
3429	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3430		spa_close(spa, FTAG);
3431		return (SET_ERROR(EINVAL));
3432	}
3433
3434	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3435		spa_close(spa, FTAG);
3436		return (SET_ERROR(ENOTSUP));
3437	}
3438
3439	error = spa_history_log(spa, message);
3440	spa_close(spa, FTAG);
3441	return (error);
3442}
3443
3444/*
3445 * The dp_config_rwlock must not be held when calling this, because the
3446 * unmount may need to write out data.
3447 *
3448 * This function is best-effort.  Callers must deal gracefully if it
3449 * remains mounted (or is remounted after this call).
3450 *
3451 * Returns 0 if the argument is not a snapshot, or it is not currently a
3452 * filesystem, or we were able to unmount it.  Returns error code otherwise.
3453 */
3454int
3455zfs_unmount_snap(const char *snapname)
3456{
3457	vfs_t *vfsp;
3458	zfsvfs_t *zfsvfs;
3459	int err;
3460
3461	if (strchr(snapname, '@') == NULL)
3462		return (0);
3463
3464	vfsp = zfs_get_vfs(snapname);
3465	if (vfsp == NULL)
3466		return (0);
3467
3468	zfsvfs = vfsp->vfs_data;
3469	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3470
3471	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3472	VFS_RELE(vfsp);
3473	if (err != 0)
3474		return (SET_ERROR(err));
3475
3476	/*
3477	 * Always force the unmount for snapshots.
3478	 */
3479
3480#ifdef illumos
3481	(void) dounmount(vfsp, MS_FORCE, kcred);
3482#else
3483	vfs_ref(vfsp);
3484	(void) dounmount(vfsp, MS_FORCE, curthread);
3485#endif
3486	return (0);
3487}
3488
3489/* ARGSUSED */
3490static int
3491zfs_unmount_snap_cb(const char *snapname, void *arg)
3492{
3493	return (zfs_unmount_snap(snapname));
3494}
3495
3496/*
3497 * When a clone is destroyed, its origin may also need to be destroyed,
3498 * in which case it must be unmounted.  This routine will do that unmount
3499 * if necessary.
3500 */
3501void
3502zfs_destroy_unmount_origin(const char *fsname)
3503{
3504	int error;
3505	objset_t *os;
3506	dsl_dataset_t *ds;
3507
3508	error = dmu_objset_hold(fsname, FTAG, &os);
3509	if (error != 0)
3510		return;
3511	ds = dmu_objset_ds(os);
3512	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3513		char originname[MAXNAMELEN];
3514		dsl_dataset_name(ds->ds_prev, originname);
3515		dmu_objset_rele(os, FTAG);
3516		(void) zfs_unmount_snap(originname);
3517	} else {
3518		dmu_objset_rele(os, FTAG);
3519	}
3520}
3521
3522/*
3523 * innvl: {
3524 *     "snaps" -> { snapshot1, snapshot2 }
3525 *     (optional boolean) "defer"
3526 * }
3527 *
3528 * outnvl: snapshot -> error code (int32)
3529 *
3530 */
3531/* ARGSUSED */
3532static int
3533zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3534{
3535	int error, poollen;
3536	nvlist_t *snaps;
3537	nvpair_t *pair;
3538	boolean_t defer;
3539
3540	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3541		return (SET_ERROR(EINVAL));
3542	defer = nvlist_exists(innvl, "defer");
3543
3544	poollen = strlen(poolname);
3545	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3546	    pair = nvlist_next_nvpair(snaps, pair)) {
3547		const char *name = nvpair_name(pair);
3548
3549		/*
3550		 * The snap must be in the specified pool to prevent the
3551		 * invalid removal of zvol minors below.
3552		 */
3553		if (strncmp(name, poolname, poollen) != 0 ||
3554		    (name[poollen] != '/' && name[poollen] != '@'))
3555			return (SET_ERROR(EXDEV));
3556
3557		error = zfs_unmount_snap(name);
3558		if (error != 0)
3559			return (error);
3560#if defined(__FreeBSD__)
3561		zvol_remove_minors(name);
3562#endif
3563	}
3564
3565	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3566}
3567
3568/*
3569 * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3570 * All bookmarks must be in the same pool.
3571 *
3572 * innvl: {
3573 *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3574 * }
3575 *
3576 * outnvl: bookmark -> error code (int32)
3577 *
3578 */
3579/* ARGSUSED */
3580static int
3581zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3582{
3583	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3584	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3585		char *snap_name;
3586
3587		/*
3588		 * Verify the snapshot argument.
3589		 */
3590		if (nvpair_value_string(pair, &snap_name) != 0)
3591			return (SET_ERROR(EINVAL));
3592
3593
3594		/* Verify that the keys (bookmarks) are unique */
3595		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3596		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3597			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3598				return (SET_ERROR(EINVAL));
3599		}
3600	}
3601
3602	return (dsl_bookmark_create(innvl, outnvl));
3603}
3604
3605/*
3606 * innvl: {
3607 *     property 1, property 2, ...
3608 * }
3609 *
3610 * outnvl: {
3611 *     bookmark name 1 -> { property 1, property 2, ... },
3612 *     bookmark name 2 -> { property 1, property 2, ... }
3613 * }
3614 *
3615 */
3616static int
3617zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3618{
3619	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3620}
3621
3622/*
3623 * innvl: {
3624 *     bookmark name 1, bookmark name 2
3625 * }
3626 *
3627 * outnvl: bookmark -> error code (int32)
3628 *
3629 */
3630static int
3631zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3632    nvlist_t *outnvl)
3633{
3634	int error, poollen;
3635
3636	poollen = strlen(poolname);
3637	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3638	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3639		const char *name = nvpair_name(pair);
3640		const char *cp = strchr(name, '#');
3641
3642		/*
3643		 * The bookmark name must contain an #, and the part after it
3644		 * must contain only valid characters.
3645		 */
3646		if (cp == NULL ||
3647		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3648			return (SET_ERROR(EINVAL));
3649
3650		/*
3651		 * The bookmark must be in the specified pool.
3652		 */
3653		if (strncmp(name, poolname, poollen) != 0 ||
3654		    (name[poollen] != '/' && name[poollen] != '#'))
3655			return (SET_ERROR(EXDEV));
3656	}
3657
3658	error = dsl_bookmark_destroy(innvl, outnvl);
3659	return (error);
3660}
3661
3662/*
3663 * inputs:
3664 * zc_name		name of dataset to destroy
3665 * zc_objset_type	type of objset
3666 * zc_defer_destroy	mark for deferred destroy
3667 *
3668 * outputs:		none
3669 */
3670static int
3671zfs_ioc_destroy(zfs_cmd_t *zc)
3672{
3673	int err;
3674
3675	if (zc->zc_objset_type == DMU_OST_ZFS) {
3676		err = zfs_unmount_snap(zc->zc_name);
3677		if (err != 0)
3678			return (err);
3679	}
3680
3681	if (strchr(zc->zc_name, '@'))
3682		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3683	else
3684		err = dsl_destroy_head(zc->zc_name);
3685	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3686#ifdef __FreeBSD__
3687		zvol_remove_minors(zc->zc_name);
3688#else
3689		(void) zvol_remove_minor(zc->zc_name);
3690#endif
3691	return (err);
3692}
3693
3694/*
3695 * fsname is name of dataset to rollback (to most recent snapshot)
3696 *
3697 * innvl is not used.
3698 *
3699 * outnvl: "target" -> name of most recent snapshot
3700 * }
3701 */
3702/* ARGSUSED */
3703static int
3704zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
3705{
3706	zfsvfs_t *zfsvfs;
3707	int error;
3708
3709	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3710		error = zfs_suspend_fs(zfsvfs);
3711		if (error == 0) {
3712			int resume_err;
3713
3714			error = dsl_dataset_rollback(fsname, zfsvfs, outnvl);
3715			resume_err = zfs_resume_fs(zfsvfs, fsname);
3716			error = error ? error : resume_err;
3717		}
3718		VFS_RELE(zfsvfs->z_vfs);
3719	} else {
3720		error = dsl_dataset_rollback(fsname, NULL, outnvl);
3721	}
3722	return (error);
3723}
3724
3725static int
3726recursive_unmount(const char *fsname, void *arg)
3727{
3728	const char *snapname = arg;
3729	char fullname[MAXNAMELEN];
3730
3731	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3732	return (zfs_unmount_snap(fullname));
3733}
3734
3735/*
3736 * inputs:
3737 * zc_name	old name of dataset
3738 * zc_value	new name of dataset
3739 * zc_cookie	recursive flag (only valid for snapshots)
3740 *
3741 * outputs:	none
3742 */
3743static int
3744zfs_ioc_rename(zfs_cmd_t *zc)
3745{
3746	boolean_t recursive = zc->zc_cookie & 1;
3747#ifdef __FreeBSD__
3748	boolean_t allow_mounted = zc->zc_cookie & 2;
3749#endif
3750	char *at;
3751
3752	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3753	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3754	    strchr(zc->zc_value, '%'))
3755		return (SET_ERROR(EINVAL));
3756
3757	at = strchr(zc->zc_name, '@');
3758	if (at != NULL) {
3759		/* snaps must be in same fs */
3760		int error;
3761
3762		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3763			return (SET_ERROR(EXDEV));
3764		*at = '\0';
3765#ifdef illumos
3766		if (zc->zc_objset_type == DMU_OST_ZFS) {
3767#else
3768		if (zc->zc_objset_type == DMU_OST_ZFS && allow_mounted) {
3769#endif
3770			error = dmu_objset_find(zc->zc_name,
3771			    recursive_unmount, at + 1,
3772			    recursive ? DS_FIND_CHILDREN : 0);
3773			if (error != 0) {
3774				*at = '@';
3775				return (error);
3776			}
3777		}
3778		error = dsl_dataset_rename_snapshot(zc->zc_name,
3779		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3780		*at = '@';
3781
3782		return (error);
3783	} else {
3784#ifdef illumos
3785		if (zc->zc_objset_type == DMU_OST_ZVOL)
3786			(void) zvol_remove_minor(zc->zc_name);
3787#endif
3788		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3789	}
3790}
3791
3792static int
3793zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3794{
3795	const char *propname = nvpair_name(pair);
3796	boolean_t issnap = (strchr(dsname, '@') != NULL);
3797	zfs_prop_t prop = zfs_name_to_prop(propname);
3798	uint64_t intval;
3799	int err;
3800
3801	if (prop == ZPROP_INVAL) {
3802		if (zfs_prop_user(propname)) {
3803			if (err = zfs_secpolicy_write_perms(dsname,
3804			    ZFS_DELEG_PERM_USERPROP, cr))
3805				return (err);
3806			return (0);
3807		}
3808
3809		if (!issnap && zfs_prop_userquota(propname)) {
3810			const char *perm = NULL;
3811			const char *uq_prefix =
3812			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3813			const char *gq_prefix =
3814			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3815
3816			if (strncmp(propname, uq_prefix,
3817			    strlen(uq_prefix)) == 0) {
3818				perm = ZFS_DELEG_PERM_USERQUOTA;
3819			} else if (strncmp(propname, gq_prefix,
3820			    strlen(gq_prefix)) == 0) {
3821				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3822			} else {
3823				/* USERUSED and GROUPUSED are read-only */
3824				return (SET_ERROR(EINVAL));
3825			}
3826
3827			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3828				return (err);
3829			return (0);
3830		}
3831
3832		return (SET_ERROR(EINVAL));
3833	}
3834
3835	if (issnap)
3836		return (SET_ERROR(EINVAL));
3837
3838	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3839		/*
3840		 * dsl_prop_get_all_impl() returns properties in this
3841		 * format.
3842		 */
3843		nvlist_t *attrs;
3844		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3845		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3846		    &pair) == 0);
3847	}
3848
3849	/*
3850	 * Check that this value is valid for this pool version
3851	 */
3852	switch (prop) {
3853	case ZFS_PROP_COMPRESSION:
3854		/*
3855		 * If the user specified gzip compression, make sure
3856		 * the SPA supports it. We ignore any errors here since
3857		 * we'll catch them later.
3858		 */
3859		if (nvpair_value_uint64(pair, &intval) == 0) {
3860			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3861			    intval <= ZIO_COMPRESS_GZIP_9 &&
3862			    zfs_earlier_version(dsname,
3863			    SPA_VERSION_GZIP_COMPRESSION)) {
3864				return (SET_ERROR(ENOTSUP));
3865			}
3866
3867			if (intval == ZIO_COMPRESS_ZLE &&
3868			    zfs_earlier_version(dsname,
3869			    SPA_VERSION_ZLE_COMPRESSION))
3870				return (SET_ERROR(ENOTSUP));
3871
3872			if (intval == ZIO_COMPRESS_LZ4) {
3873				spa_t *spa;
3874
3875				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3876					return (err);
3877
3878				if (!spa_feature_is_enabled(spa,
3879				    SPA_FEATURE_LZ4_COMPRESS)) {
3880					spa_close(spa, FTAG);
3881					return (SET_ERROR(ENOTSUP));
3882				}
3883				spa_close(spa, FTAG);
3884			}
3885
3886			/*
3887			 * If this is a bootable dataset then
3888			 * verify that the compression algorithm
3889			 * is supported for booting. We must return
3890			 * something other than ENOTSUP since it
3891			 * implies a downrev pool version.
3892			 */
3893			if (zfs_is_bootfs(dsname) &&
3894			    !BOOTFS_COMPRESS_VALID(intval)) {
3895				return (SET_ERROR(ERANGE));
3896			}
3897		}
3898		break;
3899
3900	case ZFS_PROP_COPIES:
3901		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3902			return (SET_ERROR(ENOTSUP));
3903		break;
3904
3905	case ZFS_PROP_DEDUP:
3906		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3907			return (SET_ERROR(ENOTSUP));
3908		break;
3909
3910	case ZFS_PROP_RECORDSIZE:
3911		/* Record sizes above 128k need the feature to be enabled */
3912		if (nvpair_value_uint64(pair, &intval) == 0 &&
3913		    intval > SPA_OLD_MAXBLOCKSIZE) {
3914			spa_t *spa;
3915
3916			/*
3917			 * If this is a bootable dataset then
3918			 * the we don't allow large (>128K) blocks,
3919			 * because GRUB doesn't support them.
3920			 */
3921			if (zfs_is_bootfs(dsname) &&
3922			    intval > SPA_OLD_MAXBLOCKSIZE) {
3923				return (SET_ERROR(ERANGE));
3924			}
3925
3926			/*
3927			 * We don't allow setting the property above 1MB,
3928			 * unless the tunable has been changed.
3929			 */
3930			if (intval > zfs_max_recordsize ||
3931			    intval > SPA_MAXBLOCKSIZE)
3932				return (SET_ERROR(ERANGE));
3933
3934			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3935				return (err);
3936
3937			if (!spa_feature_is_enabled(spa,
3938			    SPA_FEATURE_LARGE_BLOCKS)) {
3939				spa_close(spa, FTAG);
3940				return (SET_ERROR(ENOTSUP));
3941			}
3942			spa_close(spa, FTAG);
3943		}
3944		break;
3945
3946	case ZFS_PROP_SHARESMB:
3947		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3948			return (SET_ERROR(ENOTSUP));
3949		break;
3950
3951	case ZFS_PROP_ACLINHERIT:
3952		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3953		    nvpair_value_uint64(pair, &intval) == 0) {
3954			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3955			    zfs_earlier_version(dsname,
3956			    SPA_VERSION_PASSTHROUGH_X))
3957				return (SET_ERROR(ENOTSUP));
3958		}
3959		break;
3960	}
3961
3962	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3963}
3964
3965/*
3966 * Checks for a race condition to make sure we don't increment a feature flag
3967 * multiple times.
3968 */
3969static int
3970zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
3971{
3972	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
3973	spa_feature_t *featurep = arg;
3974
3975	if (!spa_feature_is_active(spa, *featurep))
3976		return (0);
3977	else
3978		return (SET_ERROR(EBUSY));
3979}
3980
3981/*
3982 * The callback invoked on feature activation in the sync task caused by
3983 * zfs_prop_activate_feature.
3984 */
3985static void
3986zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
3987{
3988	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
3989	spa_feature_t *featurep = arg;
3990
3991	spa_feature_incr(spa, *featurep, tx);
3992}
3993
3994/*
3995 * Activates a feature on a pool in response to a property setting. This
3996 * creates a new sync task which modifies the pool to reflect the feature
3997 * as being active.
3998 */
3999static int
4000zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4001{
4002	int err;
4003
4004	/* EBUSY here indicates that the feature is already active */
4005	err = dsl_sync_task(spa_name(spa),
4006	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4007	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4008
4009	if (err != 0 && err != EBUSY)
4010		return (err);
4011	else
4012		return (0);
4013}
4014
4015/*
4016 * Removes properties from the given props list that fail permission checks
4017 * needed to clear them and to restore them in case of a receive error. For each
4018 * property, make sure we have both set and inherit permissions.
4019 *
4020 * Returns the first error encountered if any permission checks fail. If the
4021 * caller provides a non-NULL errlist, it also gives the complete list of names
4022 * of all the properties that failed a permission check along with the
4023 * corresponding error numbers. The caller is responsible for freeing the
4024 * returned errlist.
4025 *
4026 * If every property checks out successfully, zero is returned and the list
4027 * pointed at by errlist is NULL.
4028 */
4029static int
4030zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4031{
4032	zfs_cmd_t *zc;
4033	nvpair_t *pair, *next_pair;
4034	nvlist_t *errors;
4035	int err, rv = 0;
4036
4037	if (props == NULL)
4038		return (0);
4039
4040	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4041
4042	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4043	(void) strcpy(zc->zc_name, dataset);
4044	pair = nvlist_next_nvpair(props, NULL);
4045	while (pair != NULL) {
4046		next_pair = nvlist_next_nvpair(props, pair);
4047
4048		(void) strcpy(zc->zc_value, nvpair_name(pair));
4049		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4050		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4051			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4052			VERIFY(nvlist_add_int32(errors,
4053			    zc->zc_value, err) == 0);
4054		}
4055		pair = next_pair;
4056	}
4057	kmem_free(zc, sizeof (zfs_cmd_t));
4058
4059	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4060		nvlist_free(errors);
4061		errors = NULL;
4062	} else {
4063		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4064	}
4065
4066	if (errlist == NULL)
4067		nvlist_free(errors);
4068	else
4069		*errlist = errors;
4070
4071	return (rv);
4072}
4073
4074static boolean_t
4075propval_equals(nvpair_t *p1, nvpair_t *p2)
4076{
4077	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4078		/* dsl_prop_get_all_impl() format */
4079		nvlist_t *attrs;
4080		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4081		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4082		    &p1) == 0);
4083	}
4084
4085	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4086		nvlist_t *attrs;
4087		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4088		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4089		    &p2) == 0);
4090	}
4091
4092	if (nvpair_type(p1) != nvpair_type(p2))
4093		return (B_FALSE);
4094
4095	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4096		char *valstr1, *valstr2;
4097
4098		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4099		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4100		return (strcmp(valstr1, valstr2) == 0);
4101	} else {
4102		uint64_t intval1, intval2;
4103
4104		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4105		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4106		return (intval1 == intval2);
4107	}
4108}
4109
4110/*
4111 * Remove properties from props if they are not going to change (as determined
4112 * by comparison with origprops). Remove them from origprops as well, since we
4113 * do not need to clear or restore properties that won't change.
4114 */
4115static void
4116props_reduce(nvlist_t *props, nvlist_t *origprops)
4117{
4118	nvpair_t *pair, *next_pair;
4119
4120	if (origprops == NULL)
4121		return; /* all props need to be received */
4122
4123	pair = nvlist_next_nvpair(props, NULL);
4124	while (pair != NULL) {
4125		const char *propname = nvpair_name(pair);
4126		nvpair_t *match;
4127
4128		next_pair = nvlist_next_nvpair(props, pair);
4129
4130		if ((nvlist_lookup_nvpair(origprops, propname,
4131		    &match) != 0) || !propval_equals(pair, match))
4132			goto next; /* need to set received value */
4133
4134		/* don't clear the existing received value */
4135		(void) nvlist_remove_nvpair(origprops, match);
4136		/* don't bother receiving the property */
4137		(void) nvlist_remove_nvpair(props, pair);
4138next:
4139		pair = next_pair;
4140	}
4141}
4142
4143#ifdef	DEBUG
4144static boolean_t zfs_ioc_recv_inject_err;
4145#endif
4146
4147/*
4148 * inputs:
4149 * zc_name		name of containing filesystem
4150 * zc_nvlist_src{_size}	nvlist of properties to apply
4151 * zc_value		name of snapshot to create
4152 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4153 * zc_cookie		file descriptor to recv from
4154 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4155 * zc_guid		force flag
4156 * zc_cleanup_fd	cleanup-on-exit file descriptor
4157 * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4158 * zc_resumable		if data is incomplete assume sender will resume
4159 *
4160 * outputs:
4161 * zc_cookie		number of bytes read
4162 * zc_nvlist_dst{_size} error for each unapplied received property
4163 * zc_obj		zprop_errflags_t
4164 * zc_action_handle	handle for this guid/ds mapping
4165 */
4166static int
4167zfs_ioc_recv(zfs_cmd_t *zc)
4168{
4169	file_t *fp;
4170	dmu_recv_cookie_t drc;
4171	boolean_t force = (boolean_t)zc->zc_guid;
4172	int fd;
4173	int error = 0;
4174	int props_error = 0;
4175	nvlist_t *errors;
4176	offset_t off;
4177	nvlist_t *props = NULL; /* sent properties */
4178	nvlist_t *origprops = NULL; /* existing properties */
4179	char *origin = NULL;
4180	char *tosnap;
4181	char tofs[ZFS_MAXNAMELEN];
4182	cap_rights_t rights;
4183	boolean_t first_recvd_props = B_FALSE;
4184
4185	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4186	    strchr(zc->zc_value, '@') == NULL ||
4187	    strchr(zc->zc_value, '%'))
4188		return (SET_ERROR(EINVAL));
4189
4190	(void) strcpy(tofs, zc->zc_value);
4191	tosnap = strchr(tofs, '@');
4192	*tosnap++ = '\0';
4193
4194	if (zc->zc_nvlist_src != 0 &&
4195	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4196	    zc->zc_iflags, &props)) != 0)
4197		return (error);
4198
4199	fd = zc->zc_cookie;
4200#ifdef illumos
4201	fp = getf(fd);
4202#else
4203	fget_read(curthread, fd, cap_rights_init(&rights, CAP_PREAD), &fp);
4204#endif
4205	if (fp == NULL) {
4206		nvlist_free(props);
4207		return (SET_ERROR(EBADF));
4208	}
4209
4210	errors = fnvlist_alloc();
4211
4212	if (zc->zc_string[0])
4213		origin = zc->zc_string;
4214
4215	error = dmu_recv_begin(tofs, tosnap,
4216	    &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
4217	if (error != 0)
4218		goto out;
4219
4220	/*
4221	 * Set properties before we receive the stream so that they are applied
4222	 * to the new data. Note that we must call dmu_recv_stream() if
4223	 * dmu_recv_begin() succeeds.
4224	 */
4225	if (props != NULL && !drc.drc_newfs) {
4226		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4227		    SPA_VERSION_RECVD_PROPS &&
4228		    !dsl_prop_get_hasrecvd(tofs))
4229			first_recvd_props = B_TRUE;
4230
4231		/*
4232		 * If new received properties are supplied, they are to
4233		 * completely replace the existing received properties, so stash
4234		 * away the existing ones.
4235		 */
4236		if (dsl_prop_get_received(tofs, &origprops) == 0) {
4237			nvlist_t *errlist = NULL;
4238			/*
4239			 * Don't bother writing a property if its value won't
4240			 * change (and avoid the unnecessary security checks).
4241			 *
4242			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4243			 * special case where we blow away all local properties
4244			 * regardless.
4245			 */
4246			if (!first_recvd_props)
4247				props_reduce(props, origprops);
4248			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4249				(void) nvlist_merge(errors, errlist, 0);
4250			nvlist_free(errlist);
4251
4252			if (clear_received_props(tofs, origprops,
4253			    first_recvd_props ? NULL : props) != 0)
4254				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4255		} else {
4256			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4257		}
4258	}
4259
4260	if (props != NULL) {
4261		props_error = dsl_prop_set_hasrecvd(tofs);
4262
4263		if (props_error == 0) {
4264			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4265			    props, errors);
4266		}
4267	}
4268
4269	if (zc->zc_nvlist_dst_size != 0 &&
4270	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4271	    put_nvlist(zc, errors) != 0)) {
4272		/*
4273		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4274		 * size or supplied an invalid address.
4275		 */
4276		props_error = SET_ERROR(EINVAL);
4277	}
4278
4279	off = fp->f_offset;
4280	error = dmu_recv_stream(&drc, fp, &off, zc->zc_cleanup_fd,
4281	    &zc->zc_action_handle);
4282
4283	if (error == 0) {
4284		zfsvfs_t *zfsvfs = NULL;
4285
4286		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4287			/* online recv */
4288			int end_err;
4289
4290			error = zfs_suspend_fs(zfsvfs);
4291			/*
4292			 * If the suspend fails, then the recv_end will
4293			 * likely also fail, and clean up after itself.
4294			 */
4295			end_err = dmu_recv_end(&drc, zfsvfs);
4296			if (error == 0)
4297				error = zfs_resume_fs(zfsvfs, tofs);
4298			error = error ? error : end_err;
4299			VFS_RELE(zfsvfs->z_vfs);
4300		} else {
4301			error = dmu_recv_end(&drc, NULL);
4302		}
4303	}
4304
4305	zc->zc_cookie = off - fp->f_offset;
4306	if (off >= 0 && off <= MAXOFFSET_T)
4307		fp->f_offset = off;
4308
4309#ifdef	DEBUG
4310	if (zfs_ioc_recv_inject_err) {
4311		zfs_ioc_recv_inject_err = B_FALSE;
4312		error = 1;
4313	}
4314#endif
4315
4316#ifdef __FreeBSD__
4317	if (error == 0)
4318		zvol_create_minors(tofs);
4319#endif
4320
4321	/*
4322	 * On error, restore the original props.
4323	 */
4324	if (error != 0 && props != NULL && !drc.drc_newfs) {
4325		if (clear_received_props(tofs, props, NULL) != 0) {
4326			/*
4327			 * We failed to clear the received properties.
4328			 * Since we may have left a $recvd value on the
4329			 * system, we can't clear the $hasrecvd flag.
4330			 */
4331			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4332		} else if (first_recvd_props) {
4333			dsl_prop_unset_hasrecvd(tofs);
4334		}
4335
4336		if (origprops == NULL && !drc.drc_newfs) {
4337			/* We failed to stash the original properties. */
4338			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4339		}
4340
4341		/*
4342		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4343		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4344		 * explictly if we're restoring local properties cleared in the
4345		 * first new-style receive.
4346		 */
4347		if (origprops != NULL &&
4348		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4349		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4350		    origprops, NULL) != 0) {
4351			/*
4352			 * We stashed the original properties but failed to
4353			 * restore them.
4354			 */
4355			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4356		}
4357	}
4358out:
4359	nvlist_free(props);
4360	nvlist_free(origprops);
4361	nvlist_free(errors);
4362	releasef(fd);
4363
4364	if (error == 0)
4365		error = props_error;
4366
4367	return (error);
4368}
4369
4370/*
4371 * inputs:
4372 * zc_name	name of snapshot to send
4373 * zc_cookie	file descriptor to send stream to
4374 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4375 * zc_sendobj	objsetid of snapshot to send
4376 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4377 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4378 *		output size in zc_objset_type.
4379 * zc_flags	lzc_send_flags
4380 *
4381 * outputs:
4382 * zc_objset_type	estimated size, if zc_guid is set
4383 */
4384static int
4385zfs_ioc_send(zfs_cmd_t *zc)
4386{
4387	int error;
4388	offset_t off;
4389	boolean_t estimate = (zc->zc_guid != 0);
4390	boolean_t embedok = (zc->zc_flags & 0x1);
4391	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4392
4393	if (zc->zc_obj != 0) {
4394		dsl_pool_t *dp;
4395		dsl_dataset_t *tosnap;
4396
4397		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4398		if (error != 0)
4399			return (error);
4400
4401		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4402		if (error != 0) {
4403			dsl_pool_rele(dp, FTAG);
4404			return (error);
4405		}
4406
4407		if (dsl_dir_is_clone(tosnap->ds_dir))
4408			zc->zc_fromobj =
4409			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4410		dsl_dataset_rele(tosnap, FTAG);
4411		dsl_pool_rele(dp, FTAG);
4412	}
4413
4414	if (estimate) {
4415		dsl_pool_t *dp;
4416		dsl_dataset_t *tosnap;
4417		dsl_dataset_t *fromsnap = NULL;
4418
4419		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4420		if (error != 0)
4421			return (error);
4422
4423		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4424		if (error != 0) {
4425			dsl_pool_rele(dp, FTAG);
4426			return (error);
4427		}
4428
4429		if (zc->zc_fromobj != 0) {
4430			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4431			    FTAG, &fromsnap);
4432			if (error != 0) {
4433				dsl_dataset_rele(tosnap, FTAG);
4434				dsl_pool_rele(dp, FTAG);
4435				return (error);
4436			}
4437		}
4438
4439		error = dmu_send_estimate(tosnap, fromsnap,
4440		    &zc->zc_objset_type);
4441
4442		if (fromsnap != NULL)
4443			dsl_dataset_rele(fromsnap, FTAG);
4444		dsl_dataset_rele(tosnap, FTAG);
4445		dsl_pool_rele(dp, FTAG);
4446	} else {
4447		file_t *fp;
4448		cap_rights_t rights;
4449
4450#ifdef illumos
4451		fp = getf(zc->zc_cookie);
4452#else
4453		fget_write(curthread, zc->zc_cookie,
4454		    cap_rights_init(&rights, CAP_WRITE), &fp);
4455#endif
4456		if (fp == NULL)
4457			return (SET_ERROR(EBADF));
4458
4459		off = fp->f_offset;
4460		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4461		    zc->zc_fromobj, embedok, large_block_ok,
4462#ifdef illumos
4463		    zc->zc_cookie, fp->f_vnode, &off);
4464#else
4465		    zc->zc_cookie, fp, &off);
4466#endif
4467
4468		if (off >= 0 && off <= MAXOFFSET_T)
4469			fp->f_offset = off;
4470		releasef(zc->zc_cookie);
4471	}
4472	return (error);
4473}
4474
4475/*
4476 * inputs:
4477 * zc_name	name of snapshot on which to report progress
4478 * zc_cookie	file descriptor of send stream
4479 *
4480 * outputs:
4481 * zc_cookie	number of bytes written in send stream thus far
4482 */
4483static int
4484zfs_ioc_send_progress(zfs_cmd_t *zc)
4485{
4486	dsl_pool_t *dp;
4487	dsl_dataset_t *ds;
4488	dmu_sendarg_t *dsp = NULL;
4489	int error;
4490
4491	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4492	if (error != 0)
4493		return (error);
4494
4495	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4496	if (error != 0) {
4497		dsl_pool_rele(dp, FTAG);
4498		return (error);
4499	}
4500
4501	mutex_enter(&ds->ds_sendstream_lock);
4502
4503	/*
4504	 * Iterate over all the send streams currently active on this dataset.
4505	 * If there's one which matches the specified file descriptor _and_ the
4506	 * stream was started by the current process, return the progress of
4507	 * that stream.
4508	 */
4509	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4510	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4511		if (dsp->dsa_outfd == zc->zc_cookie &&
4512		    dsp->dsa_proc == curproc)
4513			break;
4514	}
4515
4516	if (dsp != NULL)
4517		zc->zc_cookie = *(dsp->dsa_off);
4518	else
4519		error = SET_ERROR(ENOENT);
4520
4521	mutex_exit(&ds->ds_sendstream_lock);
4522	dsl_dataset_rele(ds, FTAG);
4523	dsl_pool_rele(dp, FTAG);
4524	return (error);
4525}
4526
4527static int
4528zfs_ioc_inject_fault(zfs_cmd_t *zc)
4529{
4530	int id, error;
4531
4532	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4533	    &zc->zc_inject_record);
4534
4535	if (error == 0)
4536		zc->zc_guid = (uint64_t)id;
4537
4538	return (error);
4539}
4540
4541static int
4542zfs_ioc_clear_fault(zfs_cmd_t *zc)
4543{
4544	return (zio_clear_fault((int)zc->zc_guid));
4545}
4546
4547static int
4548zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4549{
4550	int id = (int)zc->zc_guid;
4551	int error;
4552
4553	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4554	    &zc->zc_inject_record);
4555
4556	zc->zc_guid = id;
4557
4558	return (error);
4559}
4560
4561static int
4562zfs_ioc_error_log(zfs_cmd_t *zc)
4563{
4564	spa_t *spa;
4565	int error;
4566	size_t count = (size_t)zc->zc_nvlist_dst_size;
4567
4568	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4569		return (error);
4570
4571	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4572	    &count);
4573	if (error == 0)
4574		zc->zc_nvlist_dst_size = count;
4575	else
4576		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4577
4578	spa_close(spa, FTAG);
4579
4580	return (error);
4581}
4582
4583static int
4584zfs_ioc_clear(zfs_cmd_t *zc)
4585{
4586	spa_t *spa;
4587	vdev_t *vd;
4588	int error;
4589
4590	/*
4591	 * On zpool clear we also fix up missing slogs
4592	 */
4593	mutex_enter(&spa_namespace_lock);
4594	spa = spa_lookup(zc->zc_name);
4595	if (spa == NULL) {
4596		mutex_exit(&spa_namespace_lock);
4597		return (SET_ERROR(EIO));
4598	}
4599	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4600		/* we need to let spa_open/spa_load clear the chains */
4601		spa_set_log_state(spa, SPA_LOG_CLEAR);
4602	}
4603	spa->spa_last_open_failed = 0;
4604	mutex_exit(&spa_namespace_lock);
4605
4606	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4607		error = spa_open(zc->zc_name, &spa, FTAG);
4608	} else {
4609		nvlist_t *policy;
4610		nvlist_t *config = NULL;
4611
4612		if (zc->zc_nvlist_src == 0)
4613			return (SET_ERROR(EINVAL));
4614
4615		if ((error = get_nvlist(zc->zc_nvlist_src,
4616		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4617			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4618			    policy, &config);
4619			if (config != NULL) {
4620				int err;
4621
4622				if ((err = put_nvlist(zc, config)) != 0)
4623					error = err;
4624				nvlist_free(config);
4625			}
4626			nvlist_free(policy);
4627		}
4628	}
4629
4630	if (error != 0)
4631		return (error);
4632
4633	spa_vdev_state_enter(spa, SCL_NONE);
4634
4635	if (zc->zc_guid == 0) {
4636		vd = NULL;
4637	} else {
4638		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4639		if (vd == NULL) {
4640			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4641			spa_close(spa, FTAG);
4642			return (SET_ERROR(ENODEV));
4643		}
4644	}
4645
4646	vdev_clear(spa, vd);
4647
4648	(void) spa_vdev_state_exit(spa, NULL, 0);
4649
4650	/*
4651	 * Resume any suspended I/Os.
4652	 */
4653	if (zio_resume(spa) != 0)
4654		error = SET_ERROR(EIO);
4655
4656	spa_close(spa, FTAG);
4657
4658	return (error);
4659}
4660
4661static int
4662zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4663{
4664	spa_t *spa;
4665	int error;
4666
4667	error = spa_open(zc->zc_name, &spa, FTAG);
4668	if (error != 0)
4669		return (error);
4670
4671	spa_vdev_state_enter(spa, SCL_NONE);
4672
4673	/*
4674	 * If a resilver is already in progress then set the
4675	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4676	 * the scan as a side effect of the reopen. Otherwise, let
4677	 * vdev_open() decided if a resilver is required.
4678	 */
4679	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4680	vdev_reopen(spa->spa_root_vdev);
4681	spa->spa_scrub_reopen = B_FALSE;
4682
4683	(void) spa_vdev_state_exit(spa, NULL, 0);
4684	spa_close(spa, FTAG);
4685	return (0);
4686}
4687/*
4688 * inputs:
4689 * zc_name	name of filesystem
4690 * zc_value	name of origin snapshot
4691 *
4692 * outputs:
4693 * zc_string	name of conflicting snapshot, if there is one
4694 */
4695static int
4696zfs_ioc_promote(zfs_cmd_t *zc)
4697{
4698	char *cp;
4699
4700	/*
4701	 * We don't need to unmount *all* the origin fs's snapshots, but
4702	 * it's easier.
4703	 */
4704	cp = strchr(zc->zc_value, '@');
4705	if (cp)
4706		*cp = '\0';
4707	(void) dmu_objset_find(zc->zc_value,
4708	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
4709	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4710}
4711
4712/*
4713 * Retrieve a single {user|group}{used|quota}@... property.
4714 *
4715 * inputs:
4716 * zc_name	name of filesystem
4717 * zc_objset_type zfs_userquota_prop_t
4718 * zc_value	domain name (eg. "S-1-234-567-89")
4719 * zc_guid	RID/UID/GID
4720 *
4721 * outputs:
4722 * zc_cookie	property value
4723 */
4724static int
4725zfs_ioc_userspace_one(zfs_cmd_t *zc)
4726{
4727	zfsvfs_t *zfsvfs;
4728	int error;
4729
4730	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4731		return (SET_ERROR(EINVAL));
4732
4733	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4734	if (error != 0)
4735		return (error);
4736
4737	error = zfs_userspace_one(zfsvfs,
4738	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4739	zfsvfs_rele(zfsvfs, FTAG);
4740
4741	return (error);
4742}
4743
4744/*
4745 * inputs:
4746 * zc_name		name of filesystem
4747 * zc_cookie		zap cursor
4748 * zc_objset_type	zfs_userquota_prop_t
4749 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4750 *
4751 * outputs:
4752 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
4753 * zc_cookie	zap cursor
4754 */
4755static int
4756zfs_ioc_userspace_many(zfs_cmd_t *zc)
4757{
4758	zfsvfs_t *zfsvfs;
4759	int bufsize = zc->zc_nvlist_dst_size;
4760
4761	if (bufsize <= 0)
4762		return (SET_ERROR(ENOMEM));
4763
4764	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4765	if (error != 0)
4766		return (error);
4767
4768	void *buf = kmem_alloc(bufsize, KM_SLEEP);
4769
4770	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4771	    buf, &zc->zc_nvlist_dst_size);
4772
4773	if (error == 0) {
4774		error = ddi_copyout(buf,
4775		    (void *)(uintptr_t)zc->zc_nvlist_dst,
4776		    zc->zc_nvlist_dst_size, zc->zc_iflags);
4777	}
4778	kmem_free(buf, bufsize);
4779	zfsvfs_rele(zfsvfs, FTAG);
4780
4781	return (error);
4782}
4783
4784/*
4785 * inputs:
4786 * zc_name		name of filesystem
4787 *
4788 * outputs:
4789 * none
4790 */
4791static int
4792zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4793{
4794	objset_t *os;
4795	int error = 0;
4796	zfsvfs_t *zfsvfs;
4797
4798	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4799		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4800			/*
4801			 * If userused is not enabled, it may be because the
4802			 * objset needs to be closed & reopened (to grow the
4803			 * objset_phys_t).  Suspend/resume the fs will do that.
4804			 */
4805			error = zfs_suspend_fs(zfsvfs);
4806			if (error == 0) {
4807				dmu_objset_refresh_ownership(zfsvfs->z_os,
4808				    zfsvfs);
4809				error = zfs_resume_fs(zfsvfs, zc->zc_name);
4810			}
4811		}
4812		if (error == 0)
4813			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4814		VFS_RELE(zfsvfs->z_vfs);
4815	} else {
4816		/* XXX kind of reading contents without owning */
4817		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4818		if (error != 0)
4819			return (error);
4820
4821		error = dmu_objset_userspace_upgrade(os);
4822		dmu_objset_rele(os, FTAG);
4823	}
4824
4825	return (error);
4826}
4827
4828#ifdef sun
4829/*
4830 * We don't want to have a hard dependency
4831 * against some special symbols in sharefs
4832 * nfs, and smbsrv.  Determine them if needed when
4833 * the first file system is shared.
4834 * Neither sharefs, nfs or smbsrv are unloadable modules.
4835 */
4836int (*znfsexport_fs)(void *arg);
4837int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4838int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4839
4840int zfs_nfsshare_inited;
4841int zfs_smbshare_inited;
4842
4843ddi_modhandle_t nfs_mod;
4844ddi_modhandle_t sharefs_mod;
4845ddi_modhandle_t smbsrv_mod;
4846#endif	/* sun */
4847kmutex_t zfs_share_lock;
4848
4849#ifdef sun
4850static int
4851zfs_init_sharefs()
4852{
4853	int error;
4854
4855	ASSERT(MUTEX_HELD(&zfs_share_lock));
4856	/* Both NFS and SMB shares also require sharetab support. */
4857	if (sharefs_mod == NULL && ((sharefs_mod =
4858	    ddi_modopen("fs/sharefs",
4859	    KRTLD_MODE_FIRST, &error)) == NULL)) {
4860		return (SET_ERROR(ENOSYS));
4861	}
4862	if (zshare_fs == NULL && ((zshare_fs =
4863	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4864	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4865		return (SET_ERROR(ENOSYS));
4866	}
4867	return (0);
4868}
4869#endif	/* sun */
4870
4871static int
4872zfs_ioc_share(zfs_cmd_t *zc)
4873{
4874#ifdef sun
4875	int error;
4876	int opcode;
4877
4878	switch (zc->zc_share.z_sharetype) {
4879	case ZFS_SHARE_NFS:
4880	case ZFS_UNSHARE_NFS:
4881		if (zfs_nfsshare_inited == 0) {
4882			mutex_enter(&zfs_share_lock);
4883			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4884			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4885				mutex_exit(&zfs_share_lock);
4886				return (SET_ERROR(ENOSYS));
4887			}
4888			if (znfsexport_fs == NULL &&
4889			    ((znfsexport_fs = (int (*)(void *))
4890			    ddi_modsym(nfs_mod,
4891			    "nfs_export", &error)) == NULL)) {
4892				mutex_exit(&zfs_share_lock);
4893				return (SET_ERROR(ENOSYS));
4894			}
4895			error = zfs_init_sharefs();
4896			if (error != 0) {
4897				mutex_exit(&zfs_share_lock);
4898				return (SET_ERROR(ENOSYS));
4899			}
4900			zfs_nfsshare_inited = 1;
4901			mutex_exit(&zfs_share_lock);
4902		}
4903		break;
4904	case ZFS_SHARE_SMB:
4905	case ZFS_UNSHARE_SMB:
4906		if (zfs_smbshare_inited == 0) {
4907			mutex_enter(&zfs_share_lock);
4908			if (smbsrv_mod == NULL && ((smbsrv_mod =
4909			    ddi_modopen("drv/smbsrv",
4910			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4911				mutex_exit(&zfs_share_lock);
4912				return (SET_ERROR(ENOSYS));
4913			}
4914			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4915			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4916			    "smb_server_share", &error)) == NULL)) {
4917				mutex_exit(&zfs_share_lock);
4918				return (SET_ERROR(ENOSYS));
4919			}
4920			error = zfs_init_sharefs();
4921			if (error != 0) {
4922				mutex_exit(&zfs_share_lock);
4923				return (SET_ERROR(ENOSYS));
4924			}
4925			zfs_smbshare_inited = 1;
4926			mutex_exit(&zfs_share_lock);
4927		}
4928		break;
4929	default:
4930		return (SET_ERROR(EINVAL));
4931	}
4932
4933	switch (zc->zc_share.z_sharetype) {
4934	case ZFS_SHARE_NFS:
4935	case ZFS_UNSHARE_NFS:
4936		if (error =
4937		    znfsexport_fs((void *)
4938		    (uintptr_t)zc->zc_share.z_exportdata))
4939			return (error);
4940		break;
4941	case ZFS_SHARE_SMB:
4942	case ZFS_UNSHARE_SMB:
4943		if (error = zsmbexport_fs((void *)
4944		    (uintptr_t)zc->zc_share.z_exportdata,
4945		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4946		    B_TRUE: B_FALSE)) {
4947			return (error);
4948		}
4949		break;
4950	}
4951
4952	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4953	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4954	    SHAREFS_ADD : SHAREFS_REMOVE;
4955
4956	/*
4957	 * Add or remove share from sharetab
4958	 */
4959	error = zshare_fs(opcode,
4960	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
4961	    zc->zc_share.z_sharemax);
4962
4963	return (error);
4964
4965#else	/* !sun */
4966	return (ENOSYS);
4967#endif	/* !sun */
4968}
4969
4970ace_t full_access[] = {
4971	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4972};
4973
4974/*
4975 * inputs:
4976 * zc_name		name of containing filesystem
4977 * zc_obj		object # beyond which we want next in-use object #
4978 *
4979 * outputs:
4980 * zc_obj		next in-use object #
4981 */
4982static int
4983zfs_ioc_next_obj(zfs_cmd_t *zc)
4984{
4985	objset_t *os = NULL;
4986	int error;
4987
4988	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4989	if (error != 0)
4990		return (error);
4991
4992	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
4993	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
4994
4995	dmu_objset_rele(os, FTAG);
4996	return (error);
4997}
4998
4999/*
5000 * inputs:
5001 * zc_name		name of filesystem
5002 * zc_value		prefix name for snapshot
5003 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5004 *
5005 * outputs:
5006 * zc_value		short name of new snapshot
5007 */
5008static int
5009zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5010{
5011	char *snap_name;
5012	char *hold_name;
5013	int error;
5014	minor_t minor;
5015
5016	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5017	if (error != 0)
5018		return (error);
5019
5020	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5021	    (u_longlong_t)ddi_get_lbolt64());
5022	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5023
5024	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5025	    hold_name);
5026	if (error == 0)
5027		(void) strcpy(zc->zc_value, snap_name);
5028	strfree(snap_name);
5029	strfree(hold_name);
5030	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5031	return (error);
5032}
5033
5034/*
5035 * inputs:
5036 * zc_name		name of "to" snapshot
5037 * zc_value		name of "from" snapshot
5038 * zc_cookie		file descriptor to write diff data on
5039 *
5040 * outputs:
5041 * dmu_diff_record_t's to the file descriptor
5042 */
5043static int
5044zfs_ioc_diff(zfs_cmd_t *zc)
5045{
5046	file_t *fp;
5047	cap_rights_t rights;
5048	offset_t off;
5049	int error;
5050
5051#ifdef illumos
5052	fp = getf(zc->zc_cookie);
5053#else
5054	fget_write(curthread, zc->zc_cookie,
5055		    cap_rights_init(&rights, CAP_WRITE), &fp);
5056#endif
5057	if (fp == NULL)
5058		return (SET_ERROR(EBADF));
5059
5060	off = fp->f_offset;
5061
5062#ifdef illumos
5063	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5064#else
5065	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
5066#endif
5067
5068	if (off >= 0 && off <= MAXOFFSET_T)
5069		fp->f_offset = off;
5070	releasef(zc->zc_cookie);
5071
5072	return (error);
5073}
5074
5075#ifdef sun
5076/*
5077 * Remove all ACL files in shares dir
5078 */
5079static int
5080zfs_smb_acl_purge(znode_t *dzp)
5081{
5082	zap_cursor_t	zc;
5083	zap_attribute_t	zap;
5084	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5085	int error;
5086
5087	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5088	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5089	    zap_cursor_advance(&zc)) {
5090		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5091		    NULL, 0)) != 0)
5092			break;
5093	}
5094	zap_cursor_fini(&zc);
5095	return (error);
5096}
5097#endif	/* sun */
5098
5099static int
5100zfs_ioc_smb_acl(zfs_cmd_t *zc)
5101{
5102#ifdef sun
5103	vnode_t *vp;
5104	znode_t *dzp;
5105	vnode_t *resourcevp = NULL;
5106	znode_t *sharedir;
5107	zfsvfs_t *zfsvfs;
5108	nvlist_t *nvlist;
5109	char *src, *target;
5110	vattr_t vattr;
5111	vsecattr_t vsec;
5112	int error = 0;
5113
5114	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5115	    NO_FOLLOW, NULL, &vp)) != 0)
5116		return (error);
5117
5118	/* Now make sure mntpnt and dataset are ZFS */
5119
5120	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
5121	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5122	    zc->zc_name) != 0)) {
5123		VN_RELE(vp);
5124		return (SET_ERROR(EINVAL));
5125	}
5126
5127	dzp = VTOZ(vp);
5128	zfsvfs = dzp->z_zfsvfs;
5129	ZFS_ENTER(zfsvfs);
5130
5131	/*
5132	 * Create share dir if its missing.
5133	 */
5134	mutex_enter(&zfsvfs->z_lock);
5135	if (zfsvfs->z_shares_dir == 0) {
5136		dmu_tx_t *tx;
5137
5138		tx = dmu_tx_create(zfsvfs->z_os);
5139		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5140		    ZFS_SHARES_DIR);
5141		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5142		error = dmu_tx_assign(tx, TXG_WAIT);
5143		if (error != 0) {
5144			dmu_tx_abort(tx);
5145		} else {
5146			error = zfs_create_share_dir(zfsvfs, tx);
5147			dmu_tx_commit(tx);
5148		}
5149		if (error != 0) {
5150			mutex_exit(&zfsvfs->z_lock);
5151			VN_RELE(vp);
5152			ZFS_EXIT(zfsvfs);
5153			return (error);
5154		}
5155	}
5156	mutex_exit(&zfsvfs->z_lock);
5157
5158	ASSERT(zfsvfs->z_shares_dir);
5159	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5160		VN_RELE(vp);
5161		ZFS_EXIT(zfsvfs);
5162		return (error);
5163	}
5164
5165	switch (zc->zc_cookie) {
5166	case ZFS_SMB_ACL_ADD:
5167		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5168		vattr.va_type = VREG;
5169		vattr.va_mode = S_IFREG|0777;
5170		vattr.va_uid = 0;
5171		vattr.va_gid = 0;
5172
5173		vsec.vsa_mask = VSA_ACE;
5174		vsec.vsa_aclentp = &full_access;
5175		vsec.vsa_aclentsz = sizeof (full_access);
5176		vsec.vsa_aclcnt = 1;
5177
5178		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5179		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5180		if (resourcevp)
5181			VN_RELE(resourcevp);
5182		break;
5183
5184	case ZFS_SMB_ACL_REMOVE:
5185		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5186		    NULL, 0);
5187		break;
5188
5189	case ZFS_SMB_ACL_RENAME:
5190		if ((error = get_nvlist(zc->zc_nvlist_src,
5191		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5192			VN_RELE(vp);
5193			VN_RELE(ZTOV(sharedir));
5194			ZFS_EXIT(zfsvfs);
5195			return (error);
5196		}
5197		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5198		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5199		    &target)) {
5200			VN_RELE(vp);
5201			VN_RELE(ZTOV(sharedir));
5202			ZFS_EXIT(zfsvfs);
5203			nvlist_free(nvlist);
5204			return (error);
5205		}
5206		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5207		    kcred, NULL, 0);
5208		nvlist_free(nvlist);
5209		break;
5210
5211	case ZFS_SMB_ACL_PURGE:
5212		error = zfs_smb_acl_purge(sharedir);
5213		break;
5214
5215	default:
5216		error = SET_ERROR(EINVAL);
5217		break;
5218	}
5219
5220	VN_RELE(vp);
5221	VN_RELE(ZTOV(sharedir));
5222
5223	ZFS_EXIT(zfsvfs);
5224
5225	return (error);
5226#else	/* !sun */
5227	return (EOPNOTSUPP);
5228#endif	/* !sun */
5229}
5230
5231/*
5232 * innvl: {
5233 *     "holds" -> { snapname -> holdname (string), ... }
5234 *     (optional) "cleanup_fd" -> fd (int32)
5235 * }
5236 *
5237 * outnvl: {
5238 *     snapname -> error value (int32)
5239 *     ...
5240 * }
5241 */
5242/* ARGSUSED */
5243static int
5244zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5245{
5246	nvpair_t *pair;
5247	nvlist_t *holds;
5248	int cleanup_fd = -1;
5249	int error;
5250	minor_t minor = 0;
5251
5252	error = nvlist_lookup_nvlist(args, "holds", &holds);
5253	if (error != 0)
5254		return (SET_ERROR(EINVAL));
5255
5256	/* make sure the user didn't pass us any invalid (empty) tags */
5257	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5258	    pair = nvlist_next_nvpair(holds, pair)) {
5259		char *htag;
5260
5261		error = nvpair_value_string(pair, &htag);
5262		if (error != 0)
5263			return (SET_ERROR(error));
5264
5265		if (strlen(htag) == 0)
5266			return (SET_ERROR(EINVAL));
5267	}
5268
5269	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5270		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5271		if (error != 0)
5272			return (error);
5273	}
5274
5275	error = dsl_dataset_user_hold(holds, minor, errlist);
5276	if (minor != 0)
5277		zfs_onexit_fd_rele(cleanup_fd);
5278	return (error);
5279}
5280
5281/*
5282 * innvl is not used.
5283 *
5284 * outnvl: {
5285 *    holdname -> time added (uint64 seconds since epoch)
5286 *    ...
5287 * }
5288 */
5289/* ARGSUSED */
5290static int
5291zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5292{
5293	return (dsl_dataset_get_holds(snapname, outnvl));
5294}
5295
5296/*
5297 * innvl: {
5298 *     snapname -> { holdname, ... }
5299 *     ...
5300 * }
5301 *
5302 * outnvl: {
5303 *     snapname -> error value (int32)
5304 *     ...
5305 * }
5306 */
5307/* ARGSUSED */
5308static int
5309zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5310{
5311	return (dsl_dataset_user_release(holds, errlist));
5312}
5313
5314/*
5315 * inputs:
5316 * zc_name		name of new filesystem or snapshot
5317 * zc_value		full name of old snapshot
5318 *
5319 * outputs:
5320 * zc_cookie		space in bytes
5321 * zc_objset_type	compressed space in bytes
5322 * zc_perm_action	uncompressed space in bytes
5323 */
5324static int
5325zfs_ioc_space_written(zfs_cmd_t *zc)
5326{
5327	int error;
5328	dsl_pool_t *dp;
5329	dsl_dataset_t *new, *old;
5330
5331	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5332	if (error != 0)
5333		return (error);
5334	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5335	if (error != 0) {
5336		dsl_pool_rele(dp, FTAG);
5337		return (error);
5338	}
5339	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5340	if (error != 0) {
5341		dsl_dataset_rele(new, FTAG);
5342		dsl_pool_rele(dp, FTAG);
5343		return (error);
5344	}
5345
5346	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5347	    &zc->zc_objset_type, &zc->zc_perm_action);
5348	dsl_dataset_rele(old, FTAG);
5349	dsl_dataset_rele(new, FTAG);
5350	dsl_pool_rele(dp, FTAG);
5351	return (error);
5352}
5353
5354/*
5355 * innvl: {
5356 *     "firstsnap" -> snapshot name
5357 * }
5358 *
5359 * outnvl: {
5360 *     "used" -> space in bytes
5361 *     "compressed" -> compressed space in bytes
5362 *     "uncompressed" -> uncompressed space in bytes
5363 * }
5364 */
5365static int
5366zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5367{
5368	int error;
5369	dsl_pool_t *dp;
5370	dsl_dataset_t *new, *old;
5371	char *firstsnap;
5372	uint64_t used, comp, uncomp;
5373
5374	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5375		return (SET_ERROR(EINVAL));
5376
5377	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5378	if (error != 0)
5379		return (error);
5380
5381	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5382	if (error == 0 && !new->ds_is_snapshot) {
5383		dsl_dataset_rele(new, FTAG);
5384		error = SET_ERROR(EINVAL);
5385	}
5386	if (error != 0) {
5387		dsl_pool_rele(dp, FTAG);
5388		return (error);
5389	}
5390	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5391	if (error == 0 && !old->ds_is_snapshot) {
5392		dsl_dataset_rele(old, FTAG);
5393		error = SET_ERROR(EINVAL);
5394	}
5395	if (error != 0) {
5396		dsl_dataset_rele(new, FTAG);
5397		dsl_pool_rele(dp, FTAG);
5398		return (error);
5399	}
5400
5401	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5402	dsl_dataset_rele(old, FTAG);
5403	dsl_dataset_rele(new, FTAG);
5404	dsl_pool_rele(dp, FTAG);
5405	fnvlist_add_uint64(outnvl, "used", used);
5406	fnvlist_add_uint64(outnvl, "compressed", comp);
5407	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5408	return (error);
5409}
5410
5411static int
5412zfs_ioc_jail(zfs_cmd_t *zc)
5413{
5414
5415	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
5416	    (int)zc->zc_jailid));
5417}
5418
5419static int
5420zfs_ioc_unjail(zfs_cmd_t *zc)
5421{
5422
5423	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
5424	    (int)zc->zc_jailid));
5425}
5426
5427/*
5428 * innvl: {
5429 *     "fd" -> file descriptor to write stream to (int32)
5430 *     (optional) "fromsnap" -> full snap name to send an incremental from
5431 *     (optional) "largeblockok" -> (value ignored)
5432 *         indicates that blocks > 128KB are permitted
5433 *     (optional) "embedok" -> (value ignored)
5434 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5435 *     (optional) "resume_object" and "resume_offset" -> (uint64)
5436 *         if present, resume send stream from specified object and offset.
5437 * }
5438 *
5439 * outnvl is unused
5440 */
5441/* ARGSUSED */
5442static int
5443zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5444{
5445	cap_rights_t rights;
5446	file_t *fp;
5447	int error;
5448	offset_t off;
5449	char *fromname = NULL;
5450	int fd;
5451	boolean_t largeblockok;
5452	boolean_t embedok;
5453	uint64_t resumeobj = 0;
5454	uint64_t resumeoff = 0;
5455
5456	error = nvlist_lookup_int32(innvl, "fd", &fd);
5457	if (error != 0)
5458		return (SET_ERROR(EINVAL));
5459
5460	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5461
5462	largeblockok = nvlist_exists(innvl, "largeblockok");
5463	embedok = nvlist_exists(innvl, "embedok");
5464
5465	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5466	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5467
5468#ifdef illumos
5469	file_t *fp = getf(fd);
5470#else
5471	fget_write(curthread, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
5472#endif
5473	if (fp == NULL)
5474		return (SET_ERROR(EBADF));
5475
5476	off = fp->f_offset;
5477	error = dmu_send(snapname, fromname, embedok, largeblockok, fd,
5478#ifdef illumos
5479	    resumeobj, resumeoff, fp->f_vnode, &off);
5480#else
5481	    resumeobj, resumeoff, fp, &off);
5482#endif
5483
5484#ifdef illumos
5485	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5486		fp->f_offset = off;
5487#else
5488	fp->f_offset = off;
5489#endif
5490
5491	releasef(fd);
5492	return (error);
5493}
5494
5495/*
5496 * Determine approximately how large a zfs send stream will be -- the number
5497 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5498 *
5499 * innvl: {
5500 *     (optional) "from" -> full snap or bookmark name to send an incremental
5501 *                          from
5502 * }
5503 *
5504 * outnvl: {
5505 *     "space" -> bytes of space (uint64)
5506 * }
5507 */
5508static int
5509zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5510{
5511	dsl_pool_t *dp;
5512	dsl_dataset_t *tosnap;
5513	int error;
5514	char *fromname;
5515	uint64_t space;
5516
5517	error = dsl_pool_hold(snapname, FTAG, &dp);
5518	if (error != 0)
5519		return (error);
5520
5521	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5522	if (error != 0) {
5523		dsl_pool_rele(dp, FTAG);
5524		return (error);
5525	}
5526
5527	error = nvlist_lookup_string(innvl, "from", &fromname);
5528	if (error == 0) {
5529		if (strchr(fromname, '@') != NULL) {
5530			/*
5531			 * If from is a snapshot, hold it and use the more
5532			 * efficient dmu_send_estimate to estimate send space
5533			 * size using deadlists.
5534			 */
5535			dsl_dataset_t *fromsnap;
5536			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5537			if (error != 0)
5538				goto out;
5539			error = dmu_send_estimate(tosnap, fromsnap, &space);
5540			dsl_dataset_rele(fromsnap, FTAG);
5541		} else if (strchr(fromname, '#') != NULL) {
5542			/*
5543			 * If from is a bookmark, fetch the creation TXG of the
5544			 * snapshot it was created from and use that to find
5545			 * blocks that were born after it.
5546			 */
5547			zfs_bookmark_phys_t frombm;
5548
5549			error = dsl_bookmark_lookup(dp, fromname, tosnap,
5550			    &frombm);
5551			if (error != 0)
5552				goto out;
5553			error = dmu_send_estimate_from_txg(tosnap,
5554			    frombm.zbm_creation_txg, &space);
5555		} else {
5556			/*
5557			 * from is not properly formatted as a snapshot or
5558			 * bookmark
5559			 */
5560			error = SET_ERROR(EINVAL);
5561			goto out;
5562		}
5563	} else {
5564		// If estimating the size of a full send, use dmu_send_estimate
5565		error = dmu_send_estimate(tosnap, NULL, &space);
5566	}
5567
5568	fnvlist_add_uint64(outnvl, "space", space);
5569
5570out:
5571	dsl_dataset_rele(tosnap, FTAG);
5572	dsl_pool_rele(dp, FTAG);
5573	return (error);
5574}
5575
5576static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5577
5578static void
5579zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5580    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5581    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5582{
5583	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5584
5585	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5586	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5587	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5588	ASSERT3P(vec->zvec_func, ==, NULL);
5589
5590	vec->zvec_legacy_func = func;
5591	vec->zvec_secpolicy = secpolicy;
5592	vec->zvec_namecheck = namecheck;
5593	vec->zvec_allow_log = log_history;
5594	vec->zvec_pool_check = pool_check;
5595}
5596
5597/*
5598 * See the block comment at the beginning of this file for details on
5599 * each argument to this function.
5600 */
5601static void
5602zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5603    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5604    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5605    boolean_t allow_log)
5606{
5607	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5608
5609	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5610	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5611	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5612	ASSERT3P(vec->zvec_func, ==, NULL);
5613
5614	/* if we are logging, the name must be valid */
5615	ASSERT(!allow_log || namecheck != NO_NAME);
5616
5617	vec->zvec_name = name;
5618	vec->zvec_func = func;
5619	vec->zvec_secpolicy = secpolicy;
5620	vec->zvec_namecheck = namecheck;
5621	vec->zvec_pool_check = pool_check;
5622	vec->zvec_smush_outnvlist = smush_outnvlist;
5623	vec->zvec_allow_log = allow_log;
5624}
5625
5626static void
5627zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5628    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5629    zfs_ioc_poolcheck_t pool_check)
5630{
5631	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5632	    POOL_NAME, log_history, pool_check);
5633}
5634
5635static void
5636zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5637    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5638{
5639	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5640	    DATASET_NAME, B_FALSE, pool_check);
5641}
5642
5643static void
5644zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5645{
5646	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5647	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5648}
5649
5650static void
5651zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5652    zfs_secpolicy_func_t *secpolicy)
5653{
5654	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5655	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
5656}
5657
5658static void
5659zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5660    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5661{
5662	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5663	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5664}
5665
5666static void
5667zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5668{
5669	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5670	    zfs_secpolicy_read);
5671}
5672
5673static void
5674zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5675	zfs_secpolicy_func_t *secpolicy)
5676{
5677	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5678	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5679}
5680
5681static void
5682zfs_ioctl_init(void)
5683{
5684	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5685	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5686	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5687
5688	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5689	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5690	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5691
5692	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5693	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5694	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5695
5696	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5697	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5698	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5699
5700	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5701	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5702	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5703
5704	zfs_ioctl_register("create", ZFS_IOC_CREATE,
5705	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5706	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5707
5708	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5709	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5710	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5711
5712	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5713	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5714	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5715
5716	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
5717	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
5718	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5719	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
5720	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
5721	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5722
5723	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
5724	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
5725	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5726
5727	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
5728	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
5729	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
5730
5731	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
5732	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
5733	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5734
5735	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
5736	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
5737	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5738
5739	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
5740	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
5741	    POOL_NAME,
5742	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5743
5744	/* IOCTLS that use the legacy function signature */
5745
5746	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
5747	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
5748
5749	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
5750	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5751	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
5752	    zfs_ioc_pool_scan);
5753	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
5754	    zfs_ioc_pool_upgrade);
5755	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
5756	    zfs_ioc_vdev_add);
5757	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
5758	    zfs_ioc_vdev_remove);
5759	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
5760	    zfs_ioc_vdev_set_state);
5761	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
5762	    zfs_ioc_vdev_attach);
5763	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
5764	    zfs_ioc_vdev_detach);
5765	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
5766	    zfs_ioc_vdev_setpath);
5767	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
5768	    zfs_ioc_vdev_setfru);
5769	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
5770	    zfs_ioc_pool_set_props);
5771	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
5772	    zfs_ioc_vdev_split);
5773	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
5774	    zfs_ioc_pool_reguid);
5775
5776	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
5777	    zfs_ioc_pool_configs, zfs_secpolicy_none);
5778	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
5779	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
5780	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
5781	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
5782	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
5783	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
5784	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
5785	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
5786
5787	/*
5788	 * pool destroy, and export don't log the history as part of
5789	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
5790	 * does the logging of those commands.
5791	 */
5792	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
5793	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
5794	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
5795	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
5796
5797	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
5798	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5799	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
5800	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5801
5802	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
5803	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_NONE);
5804	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
5805	    zfs_ioc_dsobj_to_dsname,
5806	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_NONE);
5807	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
5808	    zfs_ioc_pool_get_history,
5809	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5810
5811	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
5812	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5813
5814	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
5815	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5816	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
5817	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
5818
5819	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
5820	    zfs_ioc_space_written);
5821	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
5822	    zfs_ioc_objset_recvd_props);
5823	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
5824	    zfs_ioc_next_obj);
5825	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
5826	    zfs_ioc_get_fsacl);
5827	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
5828	    zfs_ioc_objset_stats);
5829	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
5830	    zfs_ioc_objset_zplprops);
5831	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
5832	    zfs_ioc_dataset_list_next);
5833	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
5834	    zfs_ioc_snapshot_list_next);
5835	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
5836	    zfs_ioc_send_progress);
5837
5838	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
5839	    zfs_ioc_diff, zfs_secpolicy_diff);
5840	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
5841	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
5842	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
5843	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
5844	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
5845	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
5846	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
5847	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
5848	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
5849	    zfs_ioc_send, zfs_secpolicy_send);
5850
5851	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
5852	    zfs_secpolicy_none);
5853	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
5854	    zfs_secpolicy_destroy);
5855	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
5856	    zfs_secpolicy_rename);
5857	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
5858	    zfs_secpolicy_recv);
5859	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
5860	    zfs_secpolicy_promote);
5861	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
5862	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
5863	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
5864	    zfs_secpolicy_set_fsacl);
5865
5866	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
5867	    zfs_secpolicy_share, POOL_CHECK_NONE);
5868	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
5869	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
5870	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
5871	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
5872	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5873	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
5874	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
5875	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5876
5877#ifdef __FreeBSD__
5878	zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
5879	    zfs_secpolicy_config, POOL_CHECK_NONE);
5880	zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
5881	    zfs_secpolicy_config, POOL_CHECK_NONE);
5882#endif
5883}
5884
5885int
5886pool_status_check(const char *name, zfs_ioc_namecheck_t type,
5887    zfs_ioc_poolcheck_t check)
5888{
5889	spa_t *spa;
5890	int error;
5891
5892	ASSERT(type == POOL_NAME || type == DATASET_NAME);
5893
5894	if (check & POOL_CHECK_NONE)
5895		return (0);
5896
5897	error = spa_open(name, &spa, FTAG);
5898	if (error == 0) {
5899		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
5900			error = SET_ERROR(EAGAIN);
5901		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
5902			error = SET_ERROR(EROFS);
5903		spa_close(spa, FTAG);
5904	}
5905	return (error);
5906}
5907
5908/*
5909 * Find a free minor number.
5910 */
5911minor_t
5912zfsdev_minor_alloc(void)
5913{
5914	static minor_t last_minor;
5915	minor_t m;
5916
5917	ASSERT(MUTEX_HELD(&spa_namespace_lock));
5918
5919	for (m = last_minor + 1; m != last_minor; m++) {
5920		if (m > ZFSDEV_MAX_MINOR)
5921			m = 1;
5922		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
5923			last_minor = m;
5924			return (m);
5925		}
5926	}
5927
5928	return (0);
5929}
5930
5931static int
5932zfs_ctldev_init(struct cdev *devp)
5933{
5934	minor_t minor;
5935	zfs_soft_state_t *zs;
5936
5937	ASSERT(MUTEX_HELD(&spa_namespace_lock));
5938
5939	minor = zfsdev_minor_alloc();
5940	if (minor == 0)
5941		return (SET_ERROR(ENXIO));
5942
5943	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
5944		return (SET_ERROR(EAGAIN));
5945
5946	devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close);
5947
5948	zs = ddi_get_soft_state(zfsdev_state, minor);
5949	zs->zss_type = ZSST_CTLDEV;
5950	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
5951
5952	return (0);
5953}
5954
5955static void
5956zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
5957{
5958	ASSERT(MUTEX_HELD(&spa_namespace_lock));
5959
5960	zfs_onexit_destroy(zo);
5961	ddi_soft_state_free(zfsdev_state, minor);
5962}
5963
5964void *
5965zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
5966{
5967	zfs_soft_state_t *zp;
5968
5969	zp = ddi_get_soft_state(zfsdev_state, minor);
5970	if (zp == NULL || zp->zss_type != which)
5971		return (NULL);
5972
5973	return (zp->zss_data);
5974}
5975
5976static int
5977zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
5978{
5979	int error = 0;
5980
5981#ifdef sun
5982	if (getminor(*devp) != 0)
5983		return (zvol_open(devp, flag, otyp, cr));
5984#endif
5985
5986	/* This is the control device. Allocate a new minor if requested. */
5987	if (flag & FEXCL) {
5988		mutex_enter(&spa_namespace_lock);
5989		error = zfs_ctldev_init(devp);
5990		mutex_exit(&spa_namespace_lock);
5991	}
5992
5993	return (error);
5994}
5995
5996static void
5997zfsdev_close(void *data)
5998{
5999	zfs_onexit_t *zo;
6000	minor_t minor = (minor_t)(uintptr_t)data;
6001
6002	if (minor == 0)
6003		return;
6004
6005	mutex_enter(&spa_namespace_lock);
6006	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6007	if (zo == NULL) {
6008		mutex_exit(&spa_namespace_lock);
6009		return;
6010	}
6011	zfs_ctldev_destroy(zo, minor);
6012	mutex_exit(&spa_namespace_lock);
6013}
6014
6015static int
6016zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag,
6017    struct thread *td)
6018{
6019	zfs_cmd_t *zc;
6020	uint_t vecnum;
6021	int error, rc, len;
6022#ifdef illumos
6023	minor_t minor = getminor(dev);
6024#else
6025	zfs_iocparm_t *zc_iocparm;
6026	int cflag, cmd, oldvecnum;
6027	boolean_t newioc, compat;
6028	void *compat_zc = NULL;
6029	cred_t *cr = td->td_ucred;
6030#endif
6031	const zfs_ioc_vec_t *vec;
6032	char *saved_poolname = NULL;
6033	nvlist_t *innvl = NULL;
6034
6035	cflag = ZFS_CMD_COMPAT_NONE;
6036	compat = B_FALSE;
6037	newioc = B_TRUE;	/* "new" style (zfs_iocparm_t) ioctl */
6038
6039	len = IOCPARM_LEN(zcmd);
6040	vecnum = cmd = zcmd & 0xff;
6041
6042	/*
6043	 * Check if we are talking to supported older binaries
6044	 * and translate zfs_cmd if necessary
6045	 */
6046	if (len != sizeof(zfs_iocparm_t)) {
6047		newioc = B_FALSE;
6048		compat = B_TRUE;
6049
6050		vecnum = cmd;
6051
6052		switch (len) {
6053		case sizeof(zfs_cmd_zcmd_t):
6054			cflag = ZFS_CMD_COMPAT_LZC;
6055			break;
6056		case sizeof(zfs_cmd_deadman_t):
6057			cflag = ZFS_CMD_COMPAT_DEADMAN;
6058			break;
6059		case sizeof(zfs_cmd_v28_t):
6060			cflag = ZFS_CMD_COMPAT_V28;
6061			break;
6062		case sizeof(zfs_cmd_v15_t):
6063			cflag = ZFS_CMD_COMPAT_V15;
6064			vecnum = zfs_ioctl_v15_to_v28[cmd];
6065
6066			/*
6067			 * Return without further handling
6068			 * if the command is blacklisted.
6069			 */
6070			if (vecnum == ZFS_IOC_COMPAT_PASS)
6071				return (0);
6072			else if (vecnum == ZFS_IOC_COMPAT_FAIL)
6073				return (ENOTSUP);
6074			break;
6075		default:
6076			return (EINVAL);
6077		}
6078	}
6079
6080#ifdef illumos
6081	vecnum = cmd - ZFS_IOC_FIRST;
6082	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6083#endif
6084
6085	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6086		return (SET_ERROR(EINVAL));
6087	vec = &zfs_ioc_vec[vecnum];
6088
6089	zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6090
6091#ifdef illumos
6092	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6093	if (error != 0) {
6094		error = SET_ERROR(EFAULT);
6095		goto out;
6096	}
6097#else	/* !illumos */
6098	bzero(zc, sizeof(zfs_cmd_t));
6099
6100	if (newioc) {
6101		zc_iocparm = (void *)arg;
6102
6103		switch (zc_iocparm->zfs_ioctl_version) {
6104		case ZFS_IOCVER_CURRENT:
6105			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_t)) {
6106				error = SET_ERROR(EINVAL);
6107				goto out;
6108			}
6109			break;
6110		case ZFS_IOCVER_EDBP:
6111			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_edbp_t)) {
6112				error = SET_ERROR(EFAULT);
6113				goto out;
6114			}
6115			compat = B_TRUE;
6116			cflag = ZFS_CMD_COMPAT_EDBP;
6117			break;
6118		case ZFS_IOCVER_ZCMD:
6119			if (zc_iocparm->zfs_cmd_size > sizeof(zfs_cmd_t) ||
6120			    zc_iocparm->zfs_cmd_size < sizeof(zfs_cmd_zcmd_t)) {
6121				error = SET_ERROR(EFAULT);
6122				goto out;
6123			}
6124			compat = B_TRUE;
6125			cflag = ZFS_CMD_COMPAT_ZCMD;
6126			break;
6127		default:
6128			error = SET_ERROR(EINVAL);
6129			goto out;
6130			/* NOTREACHED */
6131		}
6132
6133		if (compat) {
6134			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6135			compat_zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
6136			bzero(compat_zc, sizeof(zfs_cmd_t));
6137
6138			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6139			    compat_zc, zc_iocparm->zfs_cmd_size, flag);
6140			if (error != 0) {
6141				error = SET_ERROR(EFAULT);
6142				goto out;
6143			}
6144		} else {
6145			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
6146			    zc, zc_iocparm->zfs_cmd_size, flag);
6147			if (error != 0) {
6148				error = SET_ERROR(EFAULT);
6149				goto out;
6150			}
6151		}
6152	}
6153
6154	if (compat) {
6155		if (newioc) {
6156			ASSERT(compat_zc != NULL);
6157			zfs_cmd_compat_get(zc, compat_zc, cflag);
6158		} else {
6159			ASSERT(compat_zc == NULL);
6160			zfs_cmd_compat_get(zc, arg, cflag);
6161		}
6162		oldvecnum = vecnum;
6163		error = zfs_ioctl_compat_pre(zc, &vecnum, cflag);
6164		if (error != 0)
6165			goto out;
6166		if (oldvecnum != vecnum)
6167			vec = &zfs_ioc_vec[vecnum];
6168	}
6169#endif	/* !illumos */
6170
6171	zc->zc_iflags = flag & FKIOCTL;
6172	if (zc->zc_nvlist_src_size != 0) {
6173		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6174		    zc->zc_iflags, &innvl);
6175		if (error != 0)
6176			goto out;
6177	}
6178
6179	/* rewrite innvl for backwards compatibility */
6180	if (compat)
6181		innvl = zfs_ioctl_compat_innvl(zc, innvl, vecnum, cflag);
6182
6183	/*
6184	 * Ensure that all pool/dataset names are valid before we pass down to
6185	 * the lower layers.
6186	 */
6187	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6188	switch (vec->zvec_namecheck) {
6189	case POOL_NAME:
6190		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6191			error = SET_ERROR(EINVAL);
6192		else
6193			error = pool_status_check(zc->zc_name,
6194			    vec->zvec_namecheck, vec->zvec_pool_check);
6195		break;
6196
6197	case DATASET_NAME:
6198		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6199			error = SET_ERROR(EINVAL);
6200		else
6201			error = pool_status_check(zc->zc_name,
6202			    vec->zvec_namecheck, vec->zvec_pool_check);
6203		break;
6204
6205	case NO_NAME:
6206		break;
6207	}
6208
6209	if (error == 0 && !(flag & FKIOCTL))
6210		error = vec->zvec_secpolicy(zc, innvl, cr);
6211
6212	if (error != 0)
6213		goto out;
6214
6215	/* legacy ioctls can modify zc_name */
6216	len = strcspn(zc->zc_name, "/@#") + 1;
6217	saved_poolname = kmem_alloc(len, KM_SLEEP);
6218	(void) strlcpy(saved_poolname, zc->zc_name, len);
6219
6220	if (vec->zvec_func != NULL) {
6221		nvlist_t *outnvl;
6222		int puterror = 0;
6223		spa_t *spa;
6224		nvlist_t *lognv = NULL;
6225
6226		ASSERT(vec->zvec_legacy_func == NULL);
6227
6228		/*
6229		 * Add the innvl to the lognv before calling the func,
6230		 * in case the func changes the innvl.
6231		 */
6232		if (vec->zvec_allow_log) {
6233			lognv = fnvlist_alloc();
6234			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6235			    vec->zvec_name);
6236			if (!nvlist_empty(innvl)) {
6237				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6238				    innvl);
6239			}
6240		}
6241
6242		outnvl = fnvlist_alloc();
6243		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6244
6245		if (error == 0 && vec->zvec_allow_log &&
6246		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6247			if (!nvlist_empty(outnvl)) {
6248				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6249				    outnvl);
6250			}
6251			(void) spa_history_log_nvl(spa, lognv);
6252			spa_close(spa, FTAG);
6253		}
6254		fnvlist_free(lognv);
6255
6256		/* rewrite outnvl for backwards compatibility */
6257		if (compat)
6258			outnvl = zfs_ioctl_compat_outnvl(zc, outnvl, vecnum,
6259			    cflag);
6260
6261		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6262			int smusherror = 0;
6263			if (vec->zvec_smush_outnvlist) {
6264				smusherror = nvlist_smush(outnvl,
6265				    zc->zc_nvlist_dst_size);
6266			}
6267			if (smusherror == 0)
6268				puterror = put_nvlist(zc, outnvl);
6269		}
6270
6271		if (puterror != 0)
6272			error = puterror;
6273
6274		nvlist_free(outnvl);
6275	} else {
6276		error = vec->zvec_legacy_func(zc);
6277	}
6278
6279out:
6280	nvlist_free(innvl);
6281
6282#ifdef illumos
6283	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6284	if (error == 0 && rc != 0)
6285		error = SET_ERROR(EFAULT);
6286#else
6287	if (compat) {
6288		zfs_ioctl_compat_post(zc, cmd, cflag);
6289		if (newioc) {
6290			ASSERT(compat_zc != NULL);
6291			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
6292
6293			zfs_cmd_compat_put(zc, compat_zc, vecnum, cflag);
6294			rc = ddi_copyout(compat_zc,
6295			    (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6296			    zc_iocparm->zfs_cmd_size, flag);
6297			if (error == 0 && rc != 0)
6298				error = SET_ERROR(EFAULT);
6299			kmem_free(compat_zc, sizeof (zfs_cmd_t));
6300		} else {
6301			zfs_cmd_compat_put(zc, arg, vecnum, cflag);
6302		}
6303	} else {
6304		ASSERT(newioc);
6305
6306		rc = ddi_copyout(zc, (void *)(uintptr_t)zc_iocparm->zfs_cmd,
6307		    sizeof (zfs_cmd_t), flag);
6308		if (error == 0 && rc != 0)
6309			error = SET_ERROR(EFAULT);
6310	}
6311#endif
6312	if (error == 0 && vec->zvec_allow_log) {
6313		char *s = tsd_get(zfs_allow_log_key);
6314		if (s != NULL)
6315			strfree(s);
6316		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6317	} else {
6318		if (saved_poolname != NULL)
6319			strfree(saved_poolname);
6320	}
6321
6322	kmem_free(zc, sizeof (zfs_cmd_t));
6323	return (error);
6324}
6325
6326#ifdef sun
6327static int
6328zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6329{
6330	if (cmd != DDI_ATTACH)
6331		return (DDI_FAILURE);
6332
6333	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6334	    DDI_PSEUDO, 0) == DDI_FAILURE)
6335		return (DDI_FAILURE);
6336
6337	zfs_dip = dip;
6338
6339	ddi_report_dev(dip);
6340
6341	return (DDI_SUCCESS);
6342}
6343
6344static int
6345zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6346{
6347	if (spa_busy() || zfs_busy() || zvol_busy())
6348		return (DDI_FAILURE);
6349
6350	if (cmd != DDI_DETACH)
6351		return (DDI_FAILURE);
6352
6353	zfs_dip = NULL;
6354
6355	ddi_prop_remove_all(dip);
6356	ddi_remove_minor_node(dip, NULL);
6357
6358	return (DDI_SUCCESS);
6359}
6360
6361/*ARGSUSED*/
6362static int
6363zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6364{
6365	switch (infocmd) {
6366	case DDI_INFO_DEVT2DEVINFO:
6367		*result = zfs_dip;
6368		return (DDI_SUCCESS);
6369
6370	case DDI_INFO_DEVT2INSTANCE:
6371		*result = (void *)0;
6372		return (DDI_SUCCESS);
6373	}
6374
6375	return (DDI_FAILURE);
6376}
6377#endif	/* sun */
6378
6379/*
6380 * OK, so this is a little weird.
6381 *
6382 * /dev/zfs is the control node, i.e. minor 0.
6383 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
6384 *
6385 * /dev/zfs has basically nothing to do except serve up ioctls,
6386 * so most of the standard driver entry points are in zvol.c.
6387 */
6388#ifdef sun
6389static struct cb_ops zfs_cb_ops = {
6390	zfsdev_open,	/* open */
6391	zfsdev_close,	/* close */
6392	zvol_strategy,	/* strategy */
6393	nodev,		/* print */
6394	zvol_dump,	/* dump */
6395	zvol_read,	/* read */
6396	zvol_write,	/* write */
6397	zfsdev_ioctl,	/* ioctl */
6398	nodev,		/* devmap */
6399	nodev,		/* mmap */
6400	nodev,		/* segmap */
6401	nochpoll,	/* poll */
6402	ddi_prop_op,	/* prop_op */
6403	NULL,		/* streamtab */
6404	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
6405	CB_REV,		/* version */
6406	nodev,		/* async read */
6407	nodev,		/* async write */
6408};
6409
6410static struct dev_ops zfs_dev_ops = {
6411	DEVO_REV,	/* version */
6412	0,		/* refcnt */
6413	zfs_info,	/* info */
6414	nulldev,	/* identify */
6415	nulldev,	/* probe */
6416	zfs_attach,	/* attach */
6417	zfs_detach,	/* detach */
6418	nodev,		/* reset */
6419	&zfs_cb_ops,	/* driver operations */
6420	NULL,		/* no bus operations */
6421	NULL,		/* power */
6422	ddi_quiesce_not_needed,	/* quiesce */
6423};
6424
6425static struct modldrv zfs_modldrv = {
6426	&mod_driverops,
6427	"ZFS storage pool",
6428	&zfs_dev_ops
6429};
6430
6431static struct modlinkage modlinkage = {
6432	MODREV_1,
6433	(void *)&zfs_modlfs,
6434	(void *)&zfs_modldrv,
6435	NULL
6436};
6437#endif	/* sun */
6438
6439static struct cdevsw zfs_cdevsw = {
6440	.d_version =	D_VERSION,
6441	.d_open =	zfsdev_open,
6442	.d_ioctl =	zfsdev_ioctl,
6443	.d_name =	ZFS_DEV_NAME
6444};
6445
6446static void
6447zfs_allow_log_destroy(void *arg)
6448{
6449	char *poolname = arg;
6450	strfree(poolname);
6451}
6452
6453static void
6454zfsdev_init(void)
6455{
6456	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
6457	    ZFS_DEV_NAME);
6458}
6459
6460static void
6461zfsdev_fini(void)
6462{
6463	if (zfsdev != NULL)
6464		destroy_dev(zfsdev);
6465}
6466
6467static struct root_hold_token *zfs_root_token;
6468struct proc *zfsproc;
6469
6470#ifdef sun
6471int
6472_init(void)
6473{
6474	int error;
6475
6476	spa_init(FREAD | FWRITE);
6477	zfs_init();
6478	zvol_init();
6479	zfs_ioctl_init();
6480
6481	if ((error = mod_install(&modlinkage)) != 0) {
6482		zvol_fini();
6483		zfs_fini();
6484		spa_fini();
6485		return (error);
6486	}
6487
6488	tsd_create(&zfs_fsyncer_key, NULL);
6489	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6490	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6491
6492	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
6493	ASSERT(error == 0);
6494	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6495
6496	return (0);
6497}
6498
6499int
6500_fini(void)
6501{
6502	int error;
6503
6504	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
6505		return (SET_ERROR(EBUSY));
6506
6507	if ((error = mod_remove(&modlinkage)) != 0)
6508		return (error);
6509
6510	zvol_fini();
6511	zfs_fini();
6512	spa_fini();
6513	if (zfs_nfsshare_inited)
6514		(void) ddi_modclose(nfs_mod);
6515	if (zfs_smbshare_inited)
6516		(void) ddi_modclose(smbsrv_mod);
6517	if (zfs_nfsshare_inited || zfs_smbshare_inited)
6518		(void) ddi_modclose(sharefs_mod);
6519
6520	tsd_destroy(&zfs_fsyncer_key);
6521	ldi_ident_release(zfs_li);
6522	zfs_li = NULL;
6523	mutex_destroy(&zfs_share_lock);
6524
6525	return (error);
6526}
6527
6528int
6529_info(struct modinfo *modinfop)
6530{
6531	return (mod_info(&modlinkage, modinfop));
6532}
6533#endif	/* sun */
6534
6535static int zfs__init(void);
6536static int zfs__fini(void);
6537static void zfs_shutdown(void *, int);
6538
6539static eventhandler_tag zfs_shutdown_event_tag;
6540
6541#define ZFS_MIN_KSTACK_PAGES 4
6542
6543int
6544zfs__init(void)
6545{
6546
6547#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
6548	printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
6549	    "overflow panic!\nPlease consider adding "
6550	    "'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
6551	    ZFS_MIN_KSTACK_PAGES);
6552#endif
6553	zfs_root_token = root_mount_hold("ZFS");
6554
6555	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6556
6557	spa_init(FREAD | FWRITE);
6558	zfs_init();
6559	zvol_init();
6560	zfs_ioctl_init();
6561
6562	tsd_create(&zfs_fsyncer_key, NULL);
6563	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6564	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6565
6566	printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
6567	root_mount_rel(zfs_root_token);
6568
6569	zfsdev_init();
6570
6571	return (0);
6572}
6573
6574int
6575zfs__fini(void)
6576{
6577	if (spa_busy() || zfs_busy() || zvol_busy() ||
6578	    zio_injection_enabled) {
6579		return (EBUSY);
6580	}
6581
6582	zfsdev_fini();
6583	zvol_fini();
6584	zfs_fini();
6585	spa_fini();
6586
6587	tsd_destroy(&zfs_fsyncer_key);
6588	tsd_destroy(&rrw_tsd_key);
6589	tsd_destroy(&zfs_allow_log_key);
6590
6591	mutex_destroy(&zfs_share_lock);
6592
6593	return (0);
6594}
6595
6596static void
6597zfs_shutdown(void *arg __unused, int howto __unused)
6598{
6599
6600	/*
6601	 * ZFS fini routines can not properly work in a panic-ed system.
6602	 */
6603	if (panicstr == NULL)
6604		(void)zfs__fini();
6605}
6606
6607
6608static int
6609zfs_modevent(module_t mod, int type, void *unused __unused)
6610{
6611	int err;
6612
6613	switch (type) {
6614	case MOD_LOAD:
6615		err = zfs__init();
6616		if (err == 0)
6617			zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
6618			    shutdown_post_sync, zfs_shutdown, NULL,
6619			    SHUTDOWN_PRI_FIRST);
6620		return (err);
6621	case MOD_UNLOAD:
6622		err = zfs__fini();
6623		if (err == 0 && zfs_shutdown_event_tag != NULL)
6624			EVENTHANDLER_DEREGISTER(shutdown_post_sync,
6625			    zfs_shutdown_event_tag);
6626		return (err);
6627	case MOD_SHUTDOWN:
6628		return (0);
6629	default:
6630		break;
6631	}
6632	return (EOPNOTSUPP);
6633}
6634
6635static moduledata_t zfs_mod = {
6636	"zfsctrl",
6637	zfs_modevent,
6638	0
6639};
6640DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
6641MODULE_VERSION(zfsctrl, 1);
6642MODULE_DEPEND(zfsctrl, opensolaris, 1, 1, 1);
6643MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
6644MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
6645