1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Portions Copyright 2011 Martin Matuska
25 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek
27 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
28 * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
29 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 * Copyright 2016 Toomas Soome <tsoome@me.com>
35 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
36 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
37 * Copyright 2017 RackTop Systems.
38 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
39 * Copyright (c) 2019 Datto Inc.
40 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
41 * Copyright (c) 2019, 2021, 2024, Klara Inc.
42 * Copyright (c) 2019, Allan Jude
43 * Copyright 2024 Oxide Computer Company
44 */
45
46/*
47 * ZFS ioctls.
48 *
49 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
50 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
51 *
52 * There are two ways that we handle ioctls: the legacy way where almost
53 * all of the logic is in the ioctl callback, and the new way where most
54 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
55 *
56 * Non-legacy ioctls should be registered by calling
57 * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
58 * from userland by lzc_ioctl().
59 *
60 * The registration arguments are as follows:
61 *
62 * const char *name
63 *   The name of the ioctl.  This is used for history logging.  If the
64 *   ioctl returns successfully (the callback returns 0), and allow_log
65 *   is true, then a history log entry will be recorded with the input &
66 *   output nvlists.  The log entry can be printed with "zpool history -i".
67 *
68 * zfs_ioc_t ioc
69 *   The ioctl request number, which userland will pass to ioctl(2).
70 *   We want newer versions of libzfs and libzfs_core to run against
71 *   existing zfs kernel modules (i.e. a deferred reboot after an update).
72 *   Therefore the ioctl numbers cannot change from release to release.
73 *
74 * zfs_secpolicy_func_t *secpolicy
75 *   This function will be called before the zfs_ioc_func_t, to
76 *   determine if this operation is permitted.  It should return EPERM
77 *   on failure, and 0 on success.  Checks include determining if the
78 *   dataset is visible in this zone, and if the user has either all
79 *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
80 *   to do this operation on this dataset with "zfs allow".
81 *
82 * zfs_ioc_namecheck_t namecheck
83 *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
84 *   name, a dataset name, or nothing.  If the name is not well-formed,
85 *   the ioctl will fail and the callback will not be called.
86 *   Therefore, the callback can assume that the name is well-formed
87 *   (e.g. is null-terminated, doesn't have more than one '@' character,
88 *   doesn't have invalid characters).
89 *
90 * zfs_ioc_poolcheck_t pool_check
91 *   This specifies requirements on the pool state.  If the pool does
92 *   not meet them (is suspended or is readonly), the ioctl will fail
93 *   and the callback will not be called.  If any checks are specified
94 *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
95 *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
96 *   POOL_CHECK_READONLY).
97 *
98 * zfs_ioc_key_t *nvl_keys
99 *  The list of expected/allowable innvl input keys. This list is used
100 *  to validate the nvlist input to the ioctl.
101 *
102 * boolean_t smush_outnvlist
103 *   If smush_outnvlist is true, then the output is presumed to be a
104 *   list of errors, and it will be "smushed" down to fit into the
105 *   caller's buffer, by removing some entries and replacing them with a
106 *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
107 *   nvlist_smush() for details.  If smush_outnvlist is false, and the
108 *   outnvlist does not fit into the userland-provided buffer, then the
109 *   ioctl will fail with ENOMEM.
110 *
111 * zfs_ioc_func_t *func
112 *   The callback function that will perform the operation.
113 *
114 *   The callback should return 0 on success, or an error number on
115 *   failure.  If the function fails, the userland ioctl will return -1,
116 *   and errno will be set to the callback's return value.  The callback
117 *   will be called with the following arguments:
118 *
119 *   const char *name
120 *     The name of the pool or dataset to operate on, from
121 *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
122 *     expected type (pool, dataset, or none).
123 *
124 *   nvlist_t *innvl
125 *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
126 *     NULL if no input nvlist was provided.  Changes to this nvlist are
127 *     ignored.  If the input nvlist could not be deserialized, the
128 *     ioctl will fail and the callback will not be called.
129 *
130 *   nvlist_t *outnvl
131 *     The output nvlist, initially empty.  The callback can fill it in,
132 *     and it will be returned to userland by serializing it into
133 *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
134 *     fails (e.g. because the caller didn't supply a large enough
135 *     buffer), then the overall ioctl will fail.  See the
136 *     'smush_nvlist' argument above for additional behaviors.
137 *
138 *     There are two typical uses of the output nvlist:
139 *       - To return state, e.g. property values.  In this case,
140 *         smush_outnvlist should be false.  If the buffer was not large
141 *         enough, the caller will reallocate a larger buffer and try
142 *         the ioctl again.
143 *
144 *       - To return multiple errors from an ioctl which makes on-disk
145 *         changes.  In this case, smush_outnvlist should be true.
146 *         Ioctls which make on-disk modifications should generally not
147 *         use the outnvl if they succeed, because the caller can not
148 *         distinguish between the operation failing, and
149 *         deserialization failing.
150 *
151 * IOCTL Interface Errors
152 *
153 * The following ioctl input errors can be returned:
154 *   ZFS_ERR_IOC_CMD_UNAVAIL	the ioctl number is not supported by kernel
155 *   ZFS_ERR_IOC_ARG_UNAVAIL	an input argument is not supported by kernel
156 *   ZFS_ERR_IOC_ARG_REQUIRED	a required input argument is missing
157 *   ZFS_ERR_IOC_ARG_BADTYPE	an input argument has an invalid type
158 */
159
160#include <sys/types.h>
161#include <sys/param.h>
162#include <sys/errno.h>
163#include <sys/uio_impl.h>
164#include <sys/file.h>
165#include <sys/kmem.h>
166#include <sys/cmn_err.h>
167#include <sys/stat.h>
168#include <sys/zfs_ioctl.h>
169#include <sys/zfs_quota.h>
170#include <sys/zfs_vfsops.h>
171#include <sys/zfs_znode.h>
172#include <sys/zap.h>
173#include <sys/spa.h>
174#include <sys/spa_impl.h>
175#include <sys/vdev.h>
176#include <sys/vdev_impl.h>
177#include <sys/dmu.h>
178#include <sys/dsl_dir.h>
179#include <sys/dsl_dataset.h>
180#include <sys/dsl_prop.h>
181#include <sys/dsl_deleg.h>
182#include <sys/dmu_objset.h>
183#include <sys/dmu_impl.h>
184#include <sys/dmu_redact.h>
185#include <sys/dmu_tx.h>
186#include <sys/sunddi.h>
187#include <sys/policy.h>
188#include <sys/zone.h>
189#include <sys/nvpair.h>
190#include <sys/pathname.h>
191#include <sys/fs/zfs.h>
192#include <sys/zfs_ctldir.h>
193#include <sys/zfs_dir.h>
194#include <sys/zfs_onexit.h>
195#include <sys/zvol.h>
196#include <sys/dsl_scan.h>
197#include <sys/fm/util.h>
198#include <sys/dsl_crypt.h>
199#include <sys/rrwlock.h>
200#include <sys/zfs_file.h>
201
202#include <sys/dmu_recv.h>
203#include <sys/dmu_send.h>
204#include <sys/dmu_recv.h>
205#include <sys/dsl_destroy.h>
206#include <sys/dsl_bookmark.h>
207#include <sys/dsl_userhold.h>
208#include <sys/zfeature.h>
209#include <sys/zcp.h>
210#include <sys/zio_checksum.h>
211#include <sys/vdev_removal.h>
212#include <sys/vdev_impl.h>
213#include <sys/vdev_initialize.h>
214#include <sys/vdev_trim.h>
215
216#include "zfs_namecheck.h"
217#include "zfs_prop.h"
218#include "zfs_deleg.h"
219#include "zfs_comutil.h"
220
221#include <sys/lua/lua.h>
222#include <sys/lua/lauxlib.h>
223#include <sys/zfs_ioctl_impl.h>
224
225kmutex_t zfsdev_state_lock;
226static zfsdev_state_t zfsdev_state_listhead;
227
228/*
229 * Limit maximum nvlist size.  We don't want users passing in insane values
230 * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
231 * Defaults to 0=auto which is handled by platform code.
232 */
233uint64_t zfs_max_nvlist_src_size = 0;
234
235/*
236 * When logging the output nvlist of an ioctl in the on-disk history, limit
237 * the logged size to this many bytes.  This must be less than DMU_MAX_ACCESS.
238 * This applies primarily to zfs_ioc_channel_program().
239 */
240static uint64_t zfs_history_output_max = 1024 * 1024;
241
242uint_t zfs_allow_log_key;
243
244/* DATA_TYPE_ANY is used when zkey_type can vary. */
245#define	DATA_TYPE_ANY	DATA_TYPE_UNKNOWN
246
247typedef struct zfs_ioc_vec {
248	zfs_ioc_legacy_func_t	*zvec_legacy_func;
249	zfs_ioc_func_t		*zvec_func;
250	zfs_secpolicy_func_t	*zvec_secpolicy;
251	zfs_ioc_namecheck_t	zvec_namecheck;
252	boolean_t		zvec_allow_log;
253	zfs_ioc_poolcheck_t	zvec_pool_check;
254	boolean_t		zvec_smush_outnvlist;
255	const char		*zvec_name;
256	const zfs_ioc_key_t	*zvec_nvl_keys;
257	size_t			zvec_nvl_key_count;
258} zfs_ioc_vec_t;
259
260/* This array is indexed by zfs_userquota_prop_t */
261static const char *userquota_perms[] = {
262	ZFS_DELEG_PERM_USERUSED,
263	ZFS_DELEG_PERM_USERQUOTA,
264	ZFS_DELEG_PERM_GROUPUSED,
265	ZFS_DELEG_PERM_GROUPQUOTA,
266	ZFS_DELEG_PERM_USEROBJUSED,
267	ZFS_DELEG_PERM_USEROBJQUOTA,
268	ZFS_DELEG_PERM_GROUPOBJUSED,
269	ZFS_DELEG_PERM_GROUPOBJQUOTA,
270	ZFS_DELEG_PERM_PROJECTUSED,
271	ZFS_DELEG_PERM_PROJECTQUOTA,
272	ZFS_DELEG_PERM_PROJECTOBJUSED,
273	ZFS_DELEG_PERM_PROJECTOBJQUOTA,
274};
275
276static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
277static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
278static int zfs_check_settable(const char *name, nvpair_t *property,
279    cred_t *cr);
280static int zfs_check_clearable(const char *dataset, nvlist_t *props,
281    nvlist_t **errors);
282static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
283    boolean_t *);
284int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
285static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
286
287static void
288history_str_free(char *buf)
289{
290	kmem_free(buf, HIS_MAX_RECORD_LEN);
291}
292
293static char *
294history_str_get(zfs_cmd_t *zc)
295{
296	char *buf;
297
298	if (zc->zc_history == 0)
299		return (NULL);
300
301	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
302	if (copyinstr((void *)(uintptr_t)zc->zc_history,
303	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
304		history_str_free(buf);
305		return (NULL);
306	}
307
308	buf[HIS_MAX_RECORD_LEN -1] = '\0';
309
310	return (buf);
311}
312
313/*
314 * Return non-zero if the spa version is less than requested version.
315 */
316static int
317zfs_earlier_version(const char *name, int version)
318{
319	spa_t *spa;
320
321	if (spa_open(name, &spa, FTAG) == 0) {
322		if (spa_version(spa) < version) {
323			spa_close(spa, FTAG);
324			return (1);
325		}
326		spa_close(spa, FTAG);
327	}
328	return (0);
329}
330
331/*
332 * Return TRUE if the ZPL version is less than requested version.
333 */
334static boolean_t
335zpl_earlier_version(const char *name, int version)
336{
337	objset_t *os;
338	boolean_t rc = B_TRUE;
339
340	if (dmu_objset_hold(name, FTAG, &os) == 0) {
341		uint64_t zplversion;
342
343		if (dmu_objset_type(os) != DMU_OST_ZFS) {
344			dmu_objset_rele(os, FTAG);
345			return (B_TRUE);
346		}
347		/* XXX reading from non-owned objset */
348		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
349			rc = zplversion < version;
350		dmu_objset_rele(os, FTAG);
351	}
352	return (rc);
353}
354
355static void
356zfs_log_history(zfs_cmd_t *zc)
357{
358	spa_t *spa;
359	char *buf;
360
361	if ((buf = history_str_get(zc)) == NULL)
362		return;
363
364	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
365		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
366			(void) spa_history_log(spa, buf);
367		spa_close(spa, FTAG);
368	}
369	history_str_free(buf);
370}
371
372/*
373 * Policy for top-level read operations (list pools).  Requires no privileges,
374 * and can be used in the local zone, as there is no associated dataset.
375 */
376static int
377zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
378{
379	(void) zc, (void) innvl, (void) cr;
380	return (0);
381}
382
383/*
384 * Policy for dataset read operations (list children, get statistics).  Requires
385 * no privileges, but must be visible in the local zone.
386 */
387static int
388zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
389{
390	(void) innvl, (void) cr;
391	if (INGLOBALZONE(curproc) ||
392	    zone_dataset_visible(zc->zc_name, NULL))
393		return (0);
394
395	return (SET_ERROR(ENOENT));
396}
397
398static int
399zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
400{
401	int writable = 1;
402
403	/*
404	 * The dataset must be visible by this zone -- check this first
405	 * so they don't see EPERM on something they shouldn't know about.
406	 */
407	if (!INGLOBALZONE(curproc) &&
408	    !zone_dataset_visible(dataset, &writable))
409		return (SET_ERROR(ENOENT));
410
411	if (INGLOBALZONE(curproc)) {
412		/*
413		 * If the fs is zoned, only root can access it from the
414		 * global zone.
415		 */
416		if (secpolicy_zfs(cr) && zoned)
417			return (SET_ERROR(EPERM));
418	} else {
419		/*
420		 * If we are in a local zone, the 'zoned' property must be set.
421		 */
422		if (!zoned)
423			return (SET_ERROR(EPERM));
424
425		/* must be writable by this zone */
426		if (!writable)
427			return (SET_ERROR(EPERM));
428	}
429	return (0);
430}
431
432static int
433zfs_dozonecheck(const char *dataset, cred_t *cr)
434{
435	uint64_t zoned;
436
437	if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
438	    &zoned, NULL))
439		return (SET_ERROR(ENOENT));
440
441	return (zfs_dozonecheck_impl(dataset, zoned, cr));
442}
443
444static int
445zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
446{
447	uint64_t zoned;
448
449	if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
450		return (SET_ERROR(ENOENT));
451
452	return (zfs_dozonecheck_impl(dataset, zoned, cr));
453}
454
455static int
456zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
457    const char *perm, cred_t *cr)
458{
459	int error;
460
461	error = zfs_dozonecheck_ds(name, ds, cr);
462	if (error == 0) {
463		error = secpolicy_zfs(cr);
464		if (error != 0)
465			error = dsl_deleg_access_impl(ds, perm, cr);
466	}
467	return (error);
468}
469
470static int
471zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
472{
473	int error;
474	dsl_dataset_t *ds;
475	dsl_pool_t *dp;
476
477	/*
478	 * First do a quick check for root in the global zone, which
479	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
480	 * will get to handle nonexistent datasets.
481	 */
482	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
483		return (0);
484
485	error = dsl_pool_hold(name, FTAG, &dp);
486	if (error != 0)
487		return (error);
488
489	error = dsl_dataset_hold(dp, name, FTAG, &ds);
490	if (error != 0) {
491		dsl_pool_rele(dp, FTAG);
492		return (error);
493	}
494
495	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
496
497	dsl_dataset_rele(ds, FTAG);
498	dsl_pool_rele(dp, FTAG);
499	return (error);
500}
501
502/*
503 * Policy for setting the security label property.
504 *
505 * Returns 0 for success, non-zero for access and other errors.
506 */
507static int
508zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
509{
510#ifdef HAVE_MLSLABEL
511	char		ds_hexsl[MAXNAMELEN];
512	bslabel_t	ds_sl, new_sl;
513	boolean_t	new_default = FALSE;
514	uint64_t	zoned;
515	int		needed_priv = -1;
516	int		error;
517
518	/* First get the existing dataset label. */
519	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
520	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
521	if (error != 0)
522		return (SET_ERROR(EPERM));
523
524	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
525		new_default = TRUE;
526
527	/* The label must be translatable */
528	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
529		return (SET_ERROR(EINVAL));
530
531	/*
532	 * In a non-global zone, disallow attempts to set a label that
533	 * doesn't match that of the zone; otherwise no other checks
534	 * are needed.
535	 */
536	if (!INGLOBALZONE(curproc)) {
537		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
538			return (SET_ERROR(EPERM));
539		return (0);
540	}
541
542	/*
543	 * For global-zone datasets (i.e., those whose zoned property is
544	 * "off", verify that the specified new label is valid for the
545	 * global zone.
546	 */
547	if (dsl_prop_get_integer(name,
548	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
549		return (SET_ERROR(EPERM));
550	if (!zoned) {
551		if (zfs_check_global_label(name, strval) != 0)
552			return (SET_ERROR(EPERM));
553	}
554
555	/*
556	 * If the existing dataset label is nondefault, check if the
557	 * dataset is mounted (label cannot be changed while mounted).
558	 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
559	 * mounted (or isn't a dataset, doesn't exist, ...).
560	 */
561	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
562		objset_t *os;
563		static const char *setsl_tag = "setsl_tag";
564
565		/*
566		 * Try to own the dataset; abort if there is any error,
567		 * (e.g., already mounted, in use, or other error).
568		 */
569		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
570		    setsl_tag, &os);
571		if (error != 0)
572			return (SET_ERROR(EPERM));
573
574		dmu_objset_disown(os, B_TRUE, setsl_tag);
575
576		if (new_default) {
577			needed_priv = PRIV_FILE_DOWNGRADE_SL;
578			goto out_check;
579		}
580
581		if (hexstr_to_label(strval, &new_sl) != 0)
582			return (SET_ERROR(EPERM));
583
584		if (blstrictdom(&ds_sl, &new_sl))
585			needed_priv = PRIV_FILE_DOWNGRADE_SL;
586		else if (blstrictdom(&new_sl, &ds_sl))
587			needed_priv = PRIV_FILE_UPGRADE_SL;
588	} else {
589		/* dataset currently has a default label */
590		if (!new_default)
591			needed_priv = PRIV_FILE_UPGRADE_SL;
592	}
593
594out_check:
595	if (needed_priv != -1)
596		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
597	return (0);
598#else
599	return (SET_ERROR(ENOTSUP));
600#endif /* HAVE_MLSLABEL */
601}
602
603static int
604zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
605    cred_t *cr)
606{
607	const char *strval;
608
609	/*
610	 * Check permissions for special properties.
611	 */
612	switch (prop) {
613	default:
614		break;
615	case ZFS_PROP_ZONED:
616		/*
617		 * Disallow setting of 'zoned' from within a local zone.
618		 */
619		if (!INGLOBALZONE(curproc))
620			return (SET_ERROR(EPERM));
621		break;
622
623	case ZFS_PROP_QUOTA:
624	case ZFS_PROP_FILESYSTEM_LIMIT:
625	case ZFS_PROP_SNAPSHOT_LIMIT:
626		if (!INGLOBALZONE(curproc)) {
627			uint64_t zoned;
628			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
629			/*
630			 * Unprivileged users are allowed to modify the
631			 * limit on things *under* (ie. contained by)
632			 * the thing they own.
633			 */
634			if (dsl_prop_get_integer(dsname,
635			    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
636				return (SET_ERROR(EPERM));
637			if (!zoned || strlen(dsname) <= strlen(setpoint))
638				return (SET_ERROR(EPERM));
639		}
640		break;
641
642	case ZFS_PROP_MLSLABEL:
643		if (!is_system_labeled())
644			return (SET_ERROR(EPERM));
645
646		if (nvpair_value_string(propval, &strval) == 0) {
647			int err;
648
649			err = zfs_set_slabel_policy(dsname, strval, CRED());
650			if (err != 0)
651				return (err);
652		}
653		break;
654	}
655
656	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
657}
658
659static int
660zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
661{
662	/*
663	 * permission to set permissions will be evaluated later in
664	 * dsl_deleg_can_allow()
665	 */
666	(void) innvl;
667	return (zfs_dozonecheck(zc->zc_name, cr));
668}
669
670static int
671zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
672{
673	(void) innvl;
674	return (zfs_secpolicy_write_perms(zc->zc_name,
675	    ZFS_DELEG_PERM_ROLLBACK, cr));
676}
677
678static int
679zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
680{
681	(void) innvl;
682	dsl_pool_t *dp;
683	dsl_dataset_t *ds;
684	const char *cp;
685	int error;
686
687	/*
688	 * Generate the current snapshot name from the given objsetid, then
689	 * use that name for the secpolicy/zone checks.
690	 */
691	cp = strchr(zc->zc_name, '@');
692	if (cp == NULL)
693		return (SET_ERROR(EINVAL));
694	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
695	if (error != 0)
696		return (error);
697
698	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
699	if (error != 0) {
700		dsl_pool_rele(dp, FTAG);
701		return (error);
702	}
703
704	dsl_dataset_name(ds, zc->zc_name);
705
706	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
707	    ZFS_DELEG_PERM_SEND, cr);
708	dsl_dataset_rele(ds, FTAG);
709	dsl_pool_rele(dp, FTAG);
710
711	return (error);
712}
713
714static int
715zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
716{
717	(void) innvl;
718	return (zfs_secpolicy_write_perms(zc->zc_name,
719	    ZFS_DELEG_PERM_SEND, cr));
720}
721
722static int
723zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
724{
725	(void) zc, (void) innvl, (void) cr;
726	return (SET_ERROR(ENOTSUP));
727}
728
729static int
730zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
731{
732	(void) zc, (void) innvl, (void) cr;
733	return (SET_ERROR(ENOTSUP));
734}
735
736static int
737zfs_get_parent(const char *datasetname, char *parent, int parentsize)
738{
739	char *cp;
740
741	/*
742	 * Remove the @bla or /bla from the end of the name to get the parent.
743	 */
744	(void) strlcpy(parent, datasetname, parentsize);
745	cp = strrchr(parent, '@');
746	if (cp != NULL) {
747		cp[0] = '\0';
748	} else {
749		cp = strrchr(parent, '/');
750		if (cp == NULL)
751			return (SET_ERROR(ENOENT));
752		cp[0] = '\0';
753	}
754
755	return (0);
756}
757
758int
759zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
760{
761	int error;
762
763	if ((error = zfs_secpolicy_write_perms(name,
764	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
765		return (error);
766
767	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
768}
769
770static int
771zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
772{
773	(void) innvl;
774	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
775}
776
777/*
778 * Destroying snapshots with delegated permissions requires
779 * descendant mount and destroy permissions.
780 */
781static int
782zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
783{
784	(void) zc;
785	nvlist_t *snaps;
786	nvpair_t *pair, *nextpair;
787	int error = 0;
788
789	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
790
791	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
792	    pair = nextpair) {
793		nextpair = nvlist_next_nvpair(snaps, pair);
794		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
795		if (error == ENOENT) {
796			/*
797			 * Ignore any snapshots that don't exist (we consider
798			 * them "already destroyed").  Remove the name from the
799			 * nvl here in case the snapshot is created between
800			 * now and when we try to destroy it (in which case
801			 * we don't want to destroy it since we haven't
802			 * checked for permission).
803			 */
804			fnvlist_remove_nvpair(snaps, pair);
805			error = 0;
806		}
807		if (error != 0)
808			break;
809	}
810
811	return (error);
812}
813
814int
815zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
816{
817	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
818	int	error;
819
820	if ((error = zfs_secpolicy_write_perms(from,
821	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
822		return (error);
823
824	if ((error = zfs_secpolicy_write_perms(from,
825	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
826		return (error);
827
828	if ((error = zfs_get_parent(to, parentname,
829	    sizeof (parentname))) != 0)
830		return (error);
831
832	if ((error = zfs_secpolicy_write_perms(parentname,
833	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
834		return (error);
835
836	if ((error = zfs_secpolicy_write_perms(parentname,
837	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
838		return (error);
839
840	return (error);
841}
842
843static int
844zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
845{
846	(void) innvl;
847	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
848}
849
850static int
851zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
852{
853	(void) innvl;
854	dsl_pool_t *dp;
855	dsl_dataset_t *clone;
856	int error;
857
858	error = zfs_secpolicy_write_perms(zc->zc_name,
859	    ZFS_DELEG_PERM_PROMOTE, cr);
860	if (error != 0)
861		return (error);
862
863	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
864	if (error != 0)
865		return (error);
866
867	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
868
869	if (error == 0) {
870		char parentname[ZFS_MAX_DATASET_NAME_LEN];
871		dsl_dataset_t *origin = NULL;
872		dsl_dir_t *dd;
873		dd = clone->ds_dir;
874
875		error = dsl_dataset_hold_obj(dd->dd_pool,
876		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
877		if (error != 0) {
878			dsl_dataset_rele(clone, FTAG);
879			dsl_pool_rele(dp, FTAG);
880			return (error);
881		}
882
883		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
884		    ZFS_DELEG_PERM_MOUNT, cr);
885
886		dsl_dataset_name(origin, parentname);
887		if (error == 0) {
888			error = zfs_secpolicy_write_perms_ds(parentname, origin,
889			    ZFS_DELEG_PERM_PROMOTE, cr);
890		}
891		dsl_dataset_rele(clone, FTAG);
892		dsl_dataset_rele(origin, FTAG);
893	}
894	dsl_pool_rele(dp, FTAG);
895	return (error);
896}
897
898static int
899zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
900{
901	(void) innvl;
902	int error;
903
904	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
905	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
906		return (error);
907
908	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
909	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
910		return (error);
911
912	return (zfs_secpolicy_write_perms(zc->zc_name,
913	    ZFS_DELEG_PERM_CREATE, cr));
914}
915
916int
917zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
918{
919	return (zfs_secpolicy_write_perms(name,
920	    ZFS_DELEG_PERM_SNAPSHOT, cr));
921}
922
923/*
924 * Check for permission to create each snapshot in the nvlist.
925 */
926static int
927zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
928{
929	(void) zc;
930	nvlist_t *snaps;
931	int error = 0;
932	nvpair_t *pair;
933
934	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
935
936	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
937	    pair = nvlist_next_nvpair(snaps, pair)) {
938		char *name = (char *)nvpair_name(pair);
939		char *atp = strchr(name, '@');
940
941		if (atp == NULL) {
942			error = SET_ERROR(EINVAL);
943			break;
944		}
945		*atp = '\0';
946		error = zfs_secpolicy_snapshot_perms(name, cr);
947		*atp = '@';
948		if (error != 0)
949			break;
950	}
951	return (error);
952}
953
954/*
955 * Check for permission to create each bookmark in the nvlist.
956 */
957static int
958zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
959{
960	(void) zc;
961	int error = 0;
962
963	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
964	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
965		char *name = (char *)nvpair_name(pair);
966		char *hashp = strchr(name, '#');
967
968		if (hashp == NULL) {
969			error = SET_ERROR(EINVAL);
970			break;
971		}
972		*hashp = '\0';
973		error = zfs_secpolicy_write_perms(name,
974		    ZFS_DELEG_PERM_BOOKMARK, cr);
975		*hashp = '#';
976		if (error != 0)
977			break;
978	}
979	return (error);
980}
981
982static int
983zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
984{
985	(void) zc;
986	nvpair_t *pair, *nextpair;
987	int error = 0;
988
989	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
990	    pair = nextpair) {
991		char *name = (char *)nvpair_name(pair);
992		char *hashp = strchr(name, '#');
993		nextpair = nvlist_next_nvpair(innvl, pair);
994
995		if (hashp == NULL) {
996			error = SET_ERROR(EINVAL);
997			break;
998		}
999
1000		*hashp = '\0';
1001		error = zfs_secpolicy_write_perms(name,
1002		    ZFS_DELEG_PERM_DESTROY, cr);
1003		*hashp = '#';
1004		if (error == ENOENT) {
1005			/*
1006			 * Ignore any filesystems that don't exist (we consider
1007			 * their bookmarks "already destroyed").  Remove
1008			 * the name from the nvl here in case the filesystem
1009			 * is created between now and when we try to destroy
1010			 * the bookmark (in which case we don't want to
1011			 * destroy it since we haven't checked for permission).
1012			 */
1013			fnvlist_remove_nvpair(innvl, pair);
1014			error = 0;
1015		}
1016		if (error != 0)
1017			break;
1018	}
1019
1020	return (error);
1021}
1022
1023static int
1024zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1025{
1026	(void) zc, (void) innvl, (void) cr;
1027	/*
1028	 * Even root must have a proper TSD so that we know what pool
1029	 * to log to.
1030	 */
1031	if (tsd_get(zfs_allow_log_key) == NULL)
1032		return (SET_ERROR(EPERM));
1033	return (0);
1034}
1035
1036static int
1037zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1038{
1039	char		parentname[ZFS_MAX_DATASET_NAME_LEN];
1040	int		error;
1041	const char	*origin;
1042
1043	if ((error = zfs_get_parent(zc->zc_name, parentname,
1044	    sizeof (parentname))) != 0)
1045		return (error);
1046
1047	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1048	    (error = zfs_secpolicy_write_perms(origin,
1049	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1050		return (error);
1051
1052	if ((error = zfs_secpolicy_write_perms(parentname,
1053	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1054		return (error);
1055
1056	return (zfs_secpolicy_write_perms(parentname,
1057	    ZFS_DELEG_PERM_MOUNT, cr));
1058}
1059
1060/*
1061 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1062 * SYS_CONFIG privilege, which is not available in a local zone.
1063 */
1064int
1065zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1066{
1067	(void) zc, (void) innvl;
1068
1069	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1070		return (SET_ERROR(EPERM));
1071
1072	return (0);
1073}
1074
1075/*
1076 * Policy for object to name lookups.
1077 */
1078static int
1079zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1080{
1081	(void) innvl;
1082	int error;
1083
1084	if (secpolicy_sys_config(cr, B_FALSE) == 0)
1085		return (0);
1086
1087	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1088	return (error);
1089}
1090
1091/*
1092 * Policy for fault injection.  Requires all privileges.
1093 */
1094static int
1095zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1096{
1097	(void) zc, (void) innvl;
1098	return (secpolicy_zinject(cr));
1099}
1100
1101static int
1102zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1103{
1104	(void) innvl;
1105	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1106
1107	if (prop == ZPROP_USERPROP) {
1108		if (!zfs_prop_user(zc->zc_value))
1109			return (SET_ERROR(EINVAL));
1110		return (zfs_secpolicy_write_perms(zc->zc_name,
1111		    ZFS_DELEG_PERM_USERPROP, cr));
1112	} else {
1113		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1114		    NULL, cr));
1115	}
1116}
1117
1118static int
1119zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1120{
1121	int err = zfs_secpolicy_read(zc, innvl, cr);
1122	if (err)
1123		return (err);
1124
1125	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1126		return (SET_ERROR(EINVAL));
1127
1128	if (zc->zc_value[0] == 0) {
1129		/*
1130		 * They are asking about a posix uid/gid.  If it's
1131		 * themself, allow it.
1132		 */
1133		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1134		    zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1135		    zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1136		    zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1137			if (zc->zc_guid == crgetuid(cr))
1138				return (0);
1139		} else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1140		    zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1141		    zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1142		    zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1143			if (groupmember(zc->zc_guid, cr))
1144				return (0);
1145		}
1146		/* else is for project quota/used */
1147	}
1148
1149	return (zfs_secpolicy_write_perms(zc->zc_name,
1150	    userquota_perms[zc->zc_objset_type], cr));
1151}
1152
1153static int
1154zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1155{
1156	int err = zfs_secpolicy_read(zc, innvl, cr);
1157	if (err)
1158		return (err);
1159
1160	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1161		return (SET_ERROR(EINVAL));
1162
1163	return (zfs_secpolicy_write_perms(zc->zc_name,
1164	    userquota_perms[zc->zc_objset_type], cr));
1165}
1166
1167static int
1168zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1169{
1170	(void) innvl;
1171	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1172	    NULL, cr));
1173}
1174
1175static int
1176zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1177{
1178	(void) zc;
1179	nvpair_t *pair;
1180	nvlist_t *holds;
1181	int error;
1182
1183	holds = fnvlist_lookup_nvlist(innvl, "holds");
1184
1185	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1186	    pair = nvlist_next_nvpair(holds, pair)) {
1187		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1188		error = dmu_fsname(nvpair_name(pair), fsname);
1189		if (error != 0)
1190			return (error);
1191		error = zfs_secpolicy_write_perms(fsname,
1192		    ZFS_DELEG_PERM_HOLD, cr);
1193		if (error != 0)
1194			return (error);
1195	}
1196	return (0);
1197}
1198
1199static int
1200zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1201{
1202	(void) zc;
1203	nvpair_t *pair;
1204	int error;
1205
1206	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1207	    pair = nvlist_next_nvpair(innvl, pair)) {
1208		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1209		error = dmu_fsname(nvpair_name(pair), fsname);
1210		if (error != 0)
1211			return (error);
1212		error = zfs_secpolicy_write_perms(fsname,
1213		    ZFS_DELEG_PERM_RELEASE, cr);
1214		if (error != 0)
1215			return (error);
1216	}
1217	return (0);
1218}
1219
1220/*
1221 * Policy for allowing temporary snapshots to be taken or released
1222 */
1223static int
1224zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1225{
1226	/*
1227	 * A temporary snapshot is the same as a snapshot,
1228	 * hold, destroy and release all rolled into one.
1229	 * Delegated diff alone is sufficient that we allow this.
1230	 */
1231	int error;
1232
1233	if (zfs_secpolicy_write_perms(zc->zc_name,
1234	    ZFS_DELEG_PERM_DIFF, cr) == 0)
1235		return (0);
1236
1237	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1238
1239	if (innvl != NULL) {
1240		if (error == 0)
1241			error = zfs_secpolicy_hold(zc, innvl, cr);
1242		if (error == 0)
1243			error = zfs_secpolicy_release(zc, innvl, cr);
1244		if (error == 0)
1245			error = zfs_secpolicy_destroy(zc, innvl, cr);
1246	}
1247	return (error);
1248}
1249
1250static int
1251zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1252{
1253	return (zfs_secpolicy_write_perms(zc->zc_name,
1254	    ZFS_DELEG_PERM_LOAD_KEY, cr));
1255}
1256
1257static int
1258zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1259{
1260	return (zfs_secpolicy_write_perms(zc->zc_name,
1261	    ZFS_DELEG_PERM_CHANGE_KEY, cr));
1262}
1263
1264/*
1265 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1266 */
1267static int
1268get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1269{
1270	char *packed;
1271	int error;
1272	nvlist_t *list = NULL;
1273
1274	/*
1275	 * Read in and unpack the user-supplied nvlist.
1276	 */
1277	if (size == 0)
1278		return (SET_ERROR(EINVAL));
1279
1280	packed = vmem_alloc(size, KM_SLEEP);
1281
1282	if (ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag) != 0) {
1283		vmem_free(packed, size);
1284		return (SET_ERROR(EFAULT));
1285	}
1286
1287	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1288		vmem_free(packed, size);
1289		return (error);
1290	}
1291
1292	vmem_free(packed, size);
1293
1294	*nvp = list;
1295	return (0);
1296}
1297
1298/*
1299 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1300 * Entries will be removed from the end of the nvlist, and one int32 entry
1301 * named "N_MORE_ERRORS" will be added indicating how many entries were
1302 * removed.
1303 */
1304static int
1305nvlist_smush(nvlist_t *errors, size_t max)
1306{
1307	size_t size;
1308
1309	size = fnvlist_size(errors);
1310
1311	if (size > max) {
1312		nvpair_t *more_errors;
1313		int n = 0;
1314
1315		if (max < 1024)
1316			return (SET_ERROR(ENOMEM));
1317
1318		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1319		more_errors = nvlist_prev_nvpair(errors, NULL);
1320
1321		do {
1322			nvpair_t *pair = nvlist_prev_nvpair(errors,
1323			    more_errors);
1324			fnvlist_remove_nvpair(errors, pair);
1325			n++;
1326			size = fnvlist_size(errors);
1327		} while (size > max);
1328
1329		fnvlist_remove_nvpair(errors, more_errors);
1330		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1331		ASSERT3U(fnvlist_size(errors), <=, max);
1332	}
1333
1334	return (0);
1335}
1336
1337static int
1338put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1339{
1340	char *packed = NULL;
1341	int error = 0;
1342	size_t size;
1343
1344	size = fnvlist_size(nvl);
1345
1346	if (size > zc->zc_nvlist_dst_size) {
1347		error = SET_ERROR(ENOMEM);
1348	} else {
1349		packed = fnvlist_pack(nvl, &size);
1350		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1351		    size, zc->zc_iflags) != 0)
1352			error = SET_ERROR(EFAULT);
1353		fnvlist_pack_free(packed, size);
1354	}
1355
1356	zc->zc_nvlist_dst_size = size;
1357	zc->zc_nvlist_dst_filled = B_TRUE;
1358	return (error);
1359}
1360
1361int
1362getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1363{
1364	int error = 0;
1365	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1366		return (SET_ERROR(EINVAL));
1367	}
1368
1369	mutex_enter(&os->os_user_ptr_lock);
1370	*zfvp = dmu_objset_get_user(os);
1371	/* bump s_active only when non-zero to prevent umount race */
1372	error = zfs_vfs_ref(zfvp);
1373	mutex_exit(&os->os_user_ptr_lock);
1374	return (error);
1375}
1376
1377int
1378getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1379{
1380	objset_t *os;
1381	int error;
1382
1383	error = dmu_objset_hold(dsname, FTAG, &os);
1384	if (error != 0)
1385		return (error);
1386
1387	error = getzfsvfs_impl(os, zfvp);
1388	dmu_objset_rele(os, FTAG);
1389	return (error);
1390}
1391
1392/*
1393 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1394 * case its z_sb will be NULL, and it will be opened as the owner.
1395 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1396 * which prevents all inode ops from running.
1397 */
1398static int
1399zfsvfs_hold(const char *name, const void *tag, zfsvfs_t **zfvp,
1400    boolean_t writer)
1401{
1402	int error = 0;
1403
1404	if (getzfsvfs(name, zfvp) != 0)
1405		error = zfsvfs_create(name, B_FALSE, zfvp);
1406	if (error == 0) {
1407		if (writer)
1408			ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
1409		else
1410			ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
1411		if ((*zfvp)->z_unmounted) {
1412			/*
1413			 * XXX we could probably try again, since the unmounting
1414			 * thread should be just about to disassociate the
1415			 * objset from the zfsvfs.
1416			 */
1417			ZFS_TEARDOWN_EXIT(*zfvp, tag);
1418			return (SET_ERROR(EBUSY));
1419		}
1420	}
1421	return (error);
1422}
1423
1424static void
1425zfsvfs_rele(zfsvfs_t *zfsvfs, const void *tag)
1426{
1427	ZFS_TEARDOWN_EXIT(zfsvfs, tag);
1428
1429	if (zfs_vfs_held(zfsvfs)) {
1430		zfs_vfs_rele(zfsvfs);
1431	} else {
1432		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1433		zfsvfs_free(zfsvfs);
1434	}
1435}
1436
1437static int
1438zfs_ioc_pool_create(zfs_cmd_t *zc)
1439{
1440	int error;
1441	nvlist_t *config, *props = NULL;
1442	nvlist_t *rootprops = NULL;
1443	nvlist_t *zplprops = NULL;
1444	dsl_crypto_params_t *dcp = NULL;
1445	const char *spa_name = zc->zc_name;
1446	boolean_t unload_wkey = B_TRUE;
1447
1448	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1449	    zc->zc_iflags, &config)))
1450		return (error);
1451
1452	if (zc->zc_nvlist_src_size != 0 && (error =
1453	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1454	    zc->zc_iflags, &props))) {
1455		nvlist_free(config);
1456		return (error);
1457	}
1458
1459	if (props) {
1460		nvlist_t *nvl = NULL;
1461		nvlist_t *hidden_args = NULL;
1462		uint64_t version = SPA_VERSION;
1463		const char *tname;
1464
1465		(void) nvlist_lookup_uint64(props,
1466		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1467		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1468			error = SET_ERROR(EINVAL);
1469			goto pool_props_bad;
1470		}
1471		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1472		if (nvl) {
1473			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1474			if (error != 0)
1475				goto pool_props_bad;
1476			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1477		}
1478
1479		(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1480		    &hidden_args);
1481		error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1482		    rootprops, hidden_args, &dcp);
1483		if (error != 0)
1484			goto pool_props_bad;
1485		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1486
1487		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1488		error = zfs_fill_zplprops_root(version, rootprops,
1489		    zplprops, NULL);
1490		if (error != 0)
1491			goto pool_props_bad;
1492
1493		if (nvlist_lookup_string(props,
1494		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1495			spa_name = tname;
1496	}
1497
1498	error = spa_create(zc->zc_name, config, props, zplprops, dcp);
1499
1500	/*
1501	 * Set the remaining root properties
1502	 */
1503	if (!error && (error = zfs_set_prop_nvlist(spa_name,
1504	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
1505		(void) spa_destroy(spa_name);
1506		unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
1507	}
1508
1509pool_props_bad:
1510	nvlist_free(rootprops);
1511	nvlist_free(zplprops);
1512	nvlist_free(config);
1513	nvlist_free(props);
1514	dsl_crypto_params_free(dcp, unload_wkey && !!error);
1515
1516	return (error);
1517}
1518
1519static int
1520zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1521{
1522	int error;
1523	zfs_log_history(zc);
1524	error = spa_destroy(zc->zc_name);
1525
1526	return (error);
1527}
1528
1529static int
1530zfs_ioc_pool_import(zfs_cmd_t *zc)
1531{
1532	nvlist_t *config, *props = NULL;
1533	uint64_t guid;
1534	int error;
1535
1536	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1537	    zc->zc_iflags, &config)) != 0)
1538		return (error);
1539
1540	if (zc->zc_nvlist_src_size != 0 && (error =
1541	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1542	    zc->zc_iflags, &props))) {
1543		nvlist_free(config);
1544		return (error);
1545	}
1546
1547	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1548	    guid != zc->zc_guid)
1549		error = SET_ERROR(EINVAL);
1550	else
1551		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1552
1553	if (zc->zc_nvlist_dst != 0) {
1554		int err;
1555
1556		if ((err = put_nvlist(zc, config)) != 0)
1557			error = err;
1558	}
1559
1560	nvlist_free(config);
1561	nvlist_free(props);
1562
1563	return (error);
1564}
1565
1566static int
1567zfs_ioc_pool_export(zfs_cmd_t *zc)
1568{
1569	int error;
1570	boolean_t force = (boolean_t)zc->zc_cookie;
1571	boolean_t hardforce = (boolean_t)zc->zc_guid;
1572
1573	zfs_log_history(zc);
1574	error = spa_export(zc->zc_name, NULL, force, hardforce);
1575
1576	return (error);
1577}
1578
1579static int
1580zfs_ioc_pool_configs(zfs_cmd_t *zc)
1581{
1582	nvlist_t *configs;
1583	int error;
1584
1585	error = spa_all_configs(&zc->zc_cookie, &configs);
1586	if (error)
1587		return (error);
1588
1589	error = put_nvlist(zc, configs);
1590
1591	nvlist_free(configs);
1592
1593	return (error);
1594}
1595
1596/*
1597 * inputs:
1598 * zc_name		name of the pool
1599 *
1600 * outputs:
1601 * zc_cookie		real errno
1602 * zc_nvlist_dst	config nvlist
1603 * zc_nvlist_dst_size	size of config nvlist
1604 */
1605static int
1606zfs_ioc_pool_stats(zfs_cmd_t *zc)
1607{
1608	nvlist_t *config;
1609	int error;
1610	int ret = 0;
1611
1612	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1613	    sizeof (zc->zc_value));
1614
1615	if (config != NULL) {
1616		ret = put_nvlist(zc, config);
1617		nvlist_free(config);
1618
1619		/*
1620		 * The config may be present even if 'error' is non-zero.
1621		 * In this case we return success, and preserve the real errno
1622		 * in 'zc_cookie'.
1623		 */
1624		zc->zc_cookie = error;
1625	} else {
1626		ret = error;
1627	}
1628
1629	return (ret);
1630}
1631
1632/*
1633 * Try to import the given pool, returning pool stats as appropriate so that
1634 * user land knows which devices are available and overall pool health.
1635 */
1636static int
1637zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1638{
1639	nvlist_t *tryconfig, *config = NULL;
1640	int error;
1641
1642	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1643	    zc->zc_iflags, &tryconfig)) != 0)
1644		return (error);
1645
1646	config = spa_tryimport(tryconfig);
1647
1648	nvlist_free(tryconfig);
1649
1650	if (config == NULL)
1651		return (SET_ERROR(EINVAL));
1652
1653	error = put_nvlist(zc, config);
1654	nvlist_free(config);
1655
1656	return (error);
1657}
1658
1659/*
1660 * inputs:
1661 * zc_name              name of the pool
1662 * zc_cookie            scan func (pool_scan_func_t)
1663 * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1664 */
1665static int
1666zfs_ioc_pool_scan(zfs_cmd_t *zc)
1667{
1668	spa_t *spa;
1669	int error;
1670
1671	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1672		return (SET_ERROR(EINVAL));
1673
1674	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1675		return (error);
1676
1677	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1678		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1679	else if (zc->zc_cookie == POOL_SCAN_NONE)
1680		error = spa_scan_stop(spa);
1681	else
1682		error = spa_scan(spa, zc->zc_cookie);
1683
1684	spa_close(spa, FTAG);
1685
1686	return (error);
1687}
1688
1689/*
1690 * inputs:
1691 * poolname             name of the pool
1692 * scan_type            scan func (pool_scan_func_t)
1693 * scan_command         scrub pause/resume flag (pool_scrub_cmd_t)
1694 */
1695static const zfs_ioc_key_t zfs_keys_pool_scrub[] = {
1696	{"scan_type",		DATA_TYPE_UINT64,	0},
1697	{"scan_command",	DATA_TYPE_UINT64,	0},
1698};
1699
1700static int
1701zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
1702{
1703	spa_t *spa;
1704	int error;
1705	uint64_t scan_type, scan_cmd;
1706
1707	if (nvlist_lookup_uint64(innvl, "scan_type", &scan_type) != 0)
1708		return (SET_ERROR(EINVAL));
1709	if (nvlist_lookup_uint64(innvl, "scan_command", &scan_cmd) != 0)
1710		return (SET_ERROR(EINVAL));
1711
1712	if (scan_cmd >= POOL_SCRUB_FLAGS_END)
1713		return (SET_ERROR(EINVAL));
1714
1715	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
1716		return (error);
1717
1718	if (scan_cmd == POOL_SCRUB_PAUSE) {
1719		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1720	} else if (scan_type == POOL_SCAN_NONE) {
1721		error = spa_scan_stop(spa);
1722	} else {
1723		error = spa_scan(spa, scan_type);
1724	}
1725
1726	spa_close(spa, FTAG);
1727	return (error);
1728}
1729
1730static int
1731zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1732{
1733	spa_t *spa;
1734	int error;
1735
1736	error = spa_open(zc->zc_name, &spa, FTAG);
1737	if (error == 0) {
1738		spa_freeze(spa);
1739		spa_close(spa, FTAG);
1740	}
1741	return (error);
1742}
1743
1744static int
1745zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1746{
1747	spa_t *spa;
1748	int error;
1749
1750	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1751		return (error);
1752
1753	if (zc->zc_cookie < spa_version(spa) ||
1754	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1755		spa_close(spa, FTAG);
1756		return (SET_ERROR(EINVAL));
1757	}
1758
1759	spa_upgrade(spa, zc->zc_cookie);
1760	spa_close(spa, FTAG);
1761
1762	return (error);
1763}
1764
1765static int
1766zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1767{
1768	spa_t *spa;
1769	char *hist_buf;
1770	uint64_t size;
1771	int error;
1772
1773	if ((size = zc->zc_history_len) == 0)
1774		return (SET_ERROR(EINVAL));
1775
1776	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1777		return (error);
1778
1779	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1780		spa_close(spa, FTAG);
1781		return (SET_ERROR(ENOTSUP));
1782	}
1783
1784	hist_buf = vmem_alloc(size, KM_SLEEP);
1785	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1786	    &zc->zc_history_len, hist_buf)) == 0) {
1787		error = ddi_copyout(hist_buf,
1788		    (void *)(uintptr_t)zc->zc_history,
1789		    zc->zc_history_len, zc->zc_iflags);
1790	}
1791
1792	spa_close(spa, FTAG);
1793	vmem_free(hist_buf, size);
1794	return (error);
1795}
1796
1797static int
1798zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1799{
1800	spa_t *spa;
1801	int error;
1802
1803	error = spa_open(zc->zc_name, &spa, FTAG);
1804	if (error == 0) {
1805		error = spa_change_guid(spa);
1806		spa_close(spa, FTAG);
1807	}
1808	return (error);
1809}
1810
1811static int
1812zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1813{
1814	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1815}
1816
1817/*
1818 * inputs:
1819 * zc_name		name of filesystem
1820 * zc_obj		object to find
1821 *
1822 * outputs:
1823 * zc_value		name of object
1824 */
1825static int
1826zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1827{
1828	objset_t *os;
1829	int error;
1830
1831	/* XXX reading from objset not owned */
1832	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1833	    FTAG, &os)) != 0)
1834		return (error);
1835	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1836		dmu_objset_rele_flags(os, B_TRUE, FTAG);
1837		return (SET_ERROR(EINVAL));
1838	}
1839	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1840	    sizeof (zc->zc_value));
1841	dmu_objset_rele_flags(os, B_TRUE, FTAG);
1842
1843	return (error);
1844}
1845
1846/*
1847 * inputs:
1848 * zc_name		name of filesystem
1849 * zc_obj		object to find
1850 *
1851 * outputs:
1852 * zc_stat		stats on object
1853 * zc_value		path to object
1854 */
1855static int
1856zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1857{
1858	objset_t *os;
1859	int error;
1860
1861	/* XXX reading from objset not owned */
1862	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1863	    FTAG, &os)) != 0)
1864		return (error);
1865	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1866		dmu_objset_rele_flags(os, B_TRUE, FTAG);
1867		return (SET_ERROR(EINVAL));
1868	}
1869	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1870	    sizeof (zc->zc_value));
1871	dmu_objset_rele_flags(os, B_TRUE, FTAG);
1872
1873	return (error);
1874}
1875
1876static int
1877zfs_ioc_vdev_add(zfs_cmd_t *zc)
1878{
1879	spa_t *spa;
1880	int error;
1881	nvlist_t *config;
1882
1883	error = spa_open(zc->zc_name, &spa, FTAG);
1884	if (error != 0)
1885		return (error);
1886
1887	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1888	    zc->zc_iflags, &config);
1889	if (error == 0) {
1890		error = spa_vdev_add(spa, config, zc->zc_flags);
1891		nvlist_free(config);
1892	}
1893	spa_close(spa, FTAG);
1894	return (error);
1895}
1896
1897/*
1898 * inputs:
1899 * zc_name		name of the pool
1900 * zc_guid		guid of vdev to remove
1901 * zc_cookie		cancel removal
1902 */
1903static int
1904zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1905{
1906	spa_t *spa;
1907	int error;
1908
1909	error = spa_open(zc->zc_name, &spa, FTAG);
1910	if (error != 0)
1911		return (error);
1912	if (zc->zc_cookie != 0) {
1913		error = spa_vdev_remove_cancel(spa);
1914	} else {
1915		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1916	}
1917	spa_close(spa, FTAG);
1918	return (error);
1919}
1920
1921static int
1922zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1923{
1924	spa_t *spa;
1925	int error;
1926	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1927
1928	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1929		return (error);
1930	switch (zc->zc_cookie) {
1931	case VDEV_STATE_ONLINE:
1932		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1933		break;
1934
1935	case VDEV_STATE_OFFLINE:
1936		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1937		break;
1938
1939	case VDEV_STATE_FAULTED:
1940		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1941		    zc->zc_obj != VDEV_AUX_EXTERNAL &&
1942		    zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
1943			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1944
1945		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1946		break;
1947
1948	case VDEV_STATE_DEGRADED:
1949		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1950		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1951			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1952
1953		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1954		break;
1955
1956	case VDEV_STATE_REMOVED:
1957		error = vdev_remove_wanted(spa, zc->zc_guid);
1958		break;
1959
1960	default:
1961		error = SET_ERROR(EINVAL);
1962	}
1963	zc->zc_cookie = newstate;
1964	spa_close(spa, FTAG);
1965	return (error);
1966}
1967
1968static int
1969zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1970{
1971	spa_t *spa;
1972	nvlist_t *config;
1973	int replacing = zc->zc_cookie;
1974	int rebuild = zc->zc_simple;
1975	int error;
1976
1977	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1978		return (error);
1979
1980	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1981	    zc->zc_iflags, &config)) == 0) {
1982		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
1983		    rebuild);
1984		nvlist_free(config);
1985	}
1986
1987	spa_close(spa, FTAG);
1988	return (error);
1989}
1990
1991static int
1992zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1993{
1994	spa_t *spa;
1995	int error;
1996
1997	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1998		return (error);
1999
2000	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2001
2002	spa_close(spa, FTAG);
2003	return (error);
2004}
2005
2006static int
2007zfs_ioc_vdev_split(zfs_cmd_t *zc)
2008{
2009	spa_t *spa;
2010	nvlist_t *config, *props = NULL;
2011	int error;
2012	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2013
2014	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2015		return (error);
2016
2017	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2018	    zc->zc_iflags, &config))) {
2019		spa_close(spa, FTAG);
2020		return (error);
2021	}
2022
2023	if (zc->zc_nvlist_src_size != 0 && (error =
2024	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2025	    zc->zc_iflags, &props))) {
2026		spa_close(spa, FTAG);
2027		nvlist_free(config);
2028		return (error);
2029	}
2030
2031	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2032
2033	spa_close(spa, FTAG);
2034
2035	nvlist_free(config);
2036	nvlist_free(props);
2037
2038	return (error);
2039}
2040
2041static int
2042zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2043{
2044	spa_t *spa;
2045	const char *path = zc->zc_value;
2046	uint64_t guid = zc->zc_guid;
2047	int error;
2048
2049	error = spa_open(zc->zc_name, &spa, FTAG);
2050	if (error != 0)
2051		return (error);
2052
2053	error = spa_vdev_setpath(spa, guid, path);
2054	spa_close(spa, FTAG);
2055	return (error);
2056}
2057
2058static int
2059zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2060{
2061	spa_t *spa;
2062	const char *fru = zc->zc_value;
2063	uint64_t guid = zc->zc_guid;
2064	int error;
2065
2066	error = spa_open(zc->zc_name, &spa, FTAG);
2067	if (error != 0)
2068		return (error);
2069
2070	error = spa_vdev_setfru(spa, guid, fru);
2071	spa_close(spa, FTAG);
2072	return (error);
2073}
2074
2075static int
2076zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2077{
2078	int error = 0;
2079	nvlist_t *nv;
2080
2081	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2082
2083	if (!zc->zc_simple && zc->zc_nvlist_dst != 0 &&
2084	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2085		dmu_objset_stats(os, nv);
2086		/*
2087		 * NB: zvol_get_stats() will read the objset contents,
2088		 * which we aren't supposed to do with a
2089		 * DS_MODE_USER hold, because it could be
2090		 * inconsistent.  So this is a bit of a workaround...
2091		 * XXX reading without owning
2092		 */
2093		if (!zc->zc_objset_stats.dds_inconsistent &&
2094		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2095			error = zvol_get_stats(os, nv);
2096			if (error == EIO) {
2097				nvlist_free(nv);
2098				return (error);
2099			}
2100			VERIFY0(error);
2101		}
2102		if (error == 0)
2103			error = put_nvlist(zc, nv);
2104		nvlist_free(nv);
2105	}
2106
2107	return (error);
2108}
2109
2110/*
2111 * inputs:
2112 * zc_name		name of filesystem
2113 * zc_nvlist_dst_size	size of buffer for property nvlist
2114 *
2115 * outputs:
2116 * zc_objset_stats	stats
2117 * zc_nvlist_dst	property nvlist
2118 * zc_nvlist_dst_size	size of property nvlist
2119 */
2120static int
2121zfs_ioc_objset_stats(zfs_cmd_t *zc)
2122{
2123	objset_t *os;
2124	int error;
2125
2126	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2127	if (error == 0) {
2128		error = zfs_ioc_objset_stats_impl(zc, os);
2129		dmu_objset_rele(os, FTAG);
2130	}
2131
2132	return (error);
2133}
2134
2135/*
2136 * inputs:
2137 * zc_name		name of filesystem
2138 * zc_nvlist_dst_size	size of buffer for property nvlist
2139 *
2140 * outputs:
2141 * zc_nvlist_dst	received property nvlist
2142 * zc_nvlist_dst_size	size of received property nvlist
2143 *
2144 * Gets received properties (distinct from local properties on or after
2145 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2146 * local property values.
2147 */
2148static int
2149zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2150{
2151	int error = 0;
2152	nvlist_t *nv;
2153
2154	/*
2155	 * Without this check, we would return local property values if the
2156	 * caller has not already received properties on or after
2157	 * SPA_VERSION_RECVD_PROPS.
2158	 */
2159	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2160		return (SET_ERROR(ENOTSUP));
2161
2162	if (zc->zc_nvlist_dst != 0 &&
2163	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2164		error = put_nvlist(zc, nv);
2165		nvlist_free(nv);
2166	}
2167
2168	return (error);
2169}
2170
2171static int
2172nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2173{
2174	uint64_t value;
2175	int error;
2176
2177	/*
2178	 * zfs_get_zplprop() will either find a value or give us
2179	 * the default value (if there is one).
2180	 */
2181	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2182		return (error);
2183	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2184	return (0);
2185}
2186
2187/*
2188 * inputs:
2189 * zc_name		name of filesystem
2190 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2191 *
2192 * outputs:
2193 * zc_nvlist_dst	zpl property nvlist
2194 * zc_nvlist_dst_size	size of zpl property nvlist
2195 */
2196static int
2197zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2198{
2199	objset_t *os;
2200	int err;
2201
2202	/* XXX reading without owning */
2203	if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
2204		return (err);
2205
2206	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2207
2208	/*
2209	 * NB: nvl_add_zplprop() will read the objset contents,
2210	 * which we aren't supposed to do with a DS_MODE_USER
2211	 * hold, because it could be inconsistent.
2212	 */
2213	if (zc->zc_nvlist_dst != 0 &&
2214	    !zc->zc_objset_stats.dds_inconsistent &&
2215	    dmu_objset_type(os) == DMU_OST_ZFS) {
2216		nvlist_t *nv;
2217
2218		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2219		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2220		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2221		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2222		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2223			err = put_nvlist(zc, nv);
2224		nvlist_free(nv);
2225	} else {
2226		err = SET_ERROR(ENOENT);
2227	}
2228	dmu_objset_rele(os, FTAG);
2229	return (err);
2230}
2231
2232/*
2233 * inputs:
2234 * zc_name		name of filesystem
2235 * zc_cookie		zap cursor
2236 * zc_nvlist_dst_size	size of buffer for property nvlist
2237 *
2238 * outputs:
2239 * zc_name		name of next filesystem
2240 * zc_cookie		zap cursor
2241 * zc_objset_stats	stats
2242 * zc_nvlist_dst	property nvlist
2243 * zc_nvlist_dst_size	size of property nvlist
2244 */
2245static int
2246zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2247{
2248	objset_t *os;
2249	int error;
2250	char *p;
2251	size_t orig_len = strlen(zc->zc_name);
2252
2253top:
2254	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
2255		if (error == ENOENT)
2256			error = SET_ERROR(ESRCH);
2257		return (error);
2258	}
2259
2260	p = strrchr(zc->zc_name, '/');
2261	if (p == NULL || p[1] != '\0')
2262		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2263	p = zc->zc_name + strlen(zc->zc_name);
2264
2265	do {
2266		error = dmu_dir_list_next(os,
2267		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2268		    NULL, &zc->zc_cookie);
2269		if (error == ENOENT)
2270			error = SET_ERROR(ESRCH);
2271	} while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
2272	dmu_objset_rele(os, FTAG);
2273
2274	/*
2275	 * If it's an internal dataset (ie. with a '$' in its name),
2276	 * don't try to get stats for it, otherwise we'll return ENOENT.
2277	 */
2278	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2279		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2280		if (error == ENOENT) {
2281			/* We lost a race with destroy, get the next one. */
2282			zc->zc_name[orig_len] = '\0';
2283			goto top;
2284		}
2285	}
2286	return (error);
2287}
2288
2289/*
2290 * inputs:
2291 * zc_name		name of filesystem
2292 * zc_cookie		zap cursor
2293 * zc_nvlist_src	iteration range nvlist
2294 * zc_nvlist_src_size	size of iteration range nvlist
2295 *
2296 * outputs:
2297 * zc_name		name of next snapshot
2298 * zc_objset_stats	stats
2299 * zc_nvlist_dst	property nvlist
2300 * zc_nvlist_dst_size	size of property nvlist
2301 */
2302static int
2303zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2304{
2305	int error;
2306	objset_t *os, *ossnap;
2307	dsl_dataset_t *ds;
2308	uint64_t min_txg = 0, max_txg = 0;
2309
2310	if (zc->zc_nvlist_src_size != 0) {
2311		nvlist_t *props = NULL;
2312		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2313		    zc->zc_iflags, &props);
2314		if (error != 0)
2315			return (error);
2316		(void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
2317		    &min_txg);
2318		(void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
2319		    &max_txg);
2320		nvlist_free(props);
2321	}
2322
2323	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2324	if (error != 0) {
2325		return (error == ENOENT ? SET_ERROR(ESRCH) : error);
2326	}
2327
2328	/*
2329	 * A dataset name of maximum length cannot have any snapshots,
2330	 * so exit immediately.
2331	 */
2332	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2333	    ZFS_MAX_DATASET_NAME_LEN) {
2334		dmu_objset_rele(os, FTAG);
2335		return (SET_ERROR(ESRCH));
2336	}
2337
2338	while (error == 0) {
2339		if (issig()) {
2340			error = SET_ERROR(EINTR);
2341			break;
2342		}
2343
2344		error = dmu_snapshot_list_next(os,
2345		    sizeof (zc->zc_name) - strlen(zc->zc_name),
2346		    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
2347		    &zc->zc_cookie, NULL);
2348		if (error == ENOENT) {
2349			error = SET_ERROR(ESRCH);
2350			break;
2351		} else if (error != 0) {
2352			break;
2353		}
2354
2355		error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
2356		    FTAG, &ds);
2357		if (error != 0)
2358			break;
2359
2360		if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
2361		    (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
2362			dsl_dataset_rele(ds, FTAG);
2363			/* undo snapshot name append */
2364			*(strchr(zc->zc_name, '@') + 1) = '\0';
2365			/* skip snapshot */
2366			continue;
2367		}
2368
2369		if (zc->zc_simple) {
2370			dsl_dataset_fast_stat(ds, &zc->zc_objset_stats);
2371			dsl_dataset_rele(ds, FTAG);
2372			break;
2373		}
2374
2375		if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
2376			dsl_dataset_rele(ds, FTAG);
2377			break;
2378		}
2379		if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
2380			dsl_dataset_rele(ds, FTAG);
2381			break;
2382		}
2383		dsl_dataset_rele(ds, FTAG);
2384		break;
2385	}
2386
2387	dmu_objset_rele(os, FTAG);
2388	/* if we failed, undo the @ that we tacked on to zc_name */
2389	if (error != 0)
2390		*strchr(zc->zc_name, '@') = '\0';
2391	return (error);
2392}
2393
2394static int
2395zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2396{
2397	const char *propname = nvpair_name(pair);
2398	uint64_t *valary;
2399	unsigned int vallen;
2400	const char *dash, *domain;
2401	zfs_userquota_prop_t type;
2402	uint64_t rid;
2403	uint64_t quota;
2404	zfsvfs_t *zfsvfs;
2405	int err;
2406
2407	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2408		nvlist_t *attrs;
2409		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2410		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2411		    &pair) != 0)
2412			return (SET_ERROR(EINVAL));
2413	}
2414
2415	/*
2416	 * A correctly constructed propname is encoded as
2417	 * userquota@<rid>-<domain>.
2418	 */
2419	if ((dash = strchr(propname, '-')) == NULL ||
2420	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2421	    vallen != 3)
2422		return (SET_ERROR(EINVAL));
2423
2424	domain = dash + 1;
2425	type = valary[0];
2426	rid = valary[1];
2427	quota = valary[2];
2428
2429	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2430	if (err == 0) {
2431		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2432		zfsvfs_rele(zfsvfs, FTAG);
2433	}
2434
2435	return (err);
2436}
2437
2438/*
2439 * If the named property is one that has a special function to set its value,
2440 * return 0 on success and a positive error code on failure; otherwise if it is
2441 * not one of the special properties handled by this function, return -1.
2442 *
2443 * XXX: It would be better for callers of the property interface if we handled
2444 * these special cases in dsl_prop.c (in the dsl layer).
2445 */
2446static int
2447zfs_prop_set_special(const char *dsname, zprop_source_t source,
2448    nvpair_t *pair)
2449{
2450	const char *propname = nvpair_name(pair);
2451	zfs_prop_t prop = zfs_name_to_prop(propname);
2452	uint64_t intval = 0;
2453	const char *strval = NULL;
2454	int err = -1;
2455
2456	if (prop == ZPROP_USERPROP) {
2457		if (zfs_prop_userquota(propname))
2458			return (zfs_prop_set_userquota(dsname, pair));
2459		return (-1);
2460	}
2461
2462	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2463		nvlist_t *attrs;
2464		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2465		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2466		    &pair) == 0);
2467	}
2468
2469	/* all special properties are numeric except for keylocation */
2470	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2471		strval = fnvpair_value_string(pair);
2472	} else {
2473		intval = fnvpair_value_uint64(pair);
2474	}
2475
2476	switch (prop) {
2477	case ZFS_PROP_QUOTA:
2478		err = dsl_dir_set_quota(dsname, source, intval);
2479		break;
2480	case ZFS_PROP_REFQUOTA:
2481		err = dsl_dataset_set_refquota(dsname, source, intval);
2482		break;
2483	case ZFS_PROP_FILESYSTEM_LIMIT:
2484	case ZFS_PROP_SNAPSHOT_LIMIT:
2485		if (intval == UINT64_MAX) {
2486			/* clearing the limit, just do it */
2487			err = 0;
2488		} else {
2489			err = dsl_dir_activate_fs_ss_limit(dsname);
2490		}
2491		/*
2492		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2493		 * default path to set the value in the nvlist.
2494		 */
2495		if (err == 0)
2496			err = -1;
2497		break;
2498	case ZFS_PROP_KEYLOCATION:
2499		err = dsl_crypto_can_set_keylocation(dsname, strval);
2500
2501		/*
2502		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2503		 * default path to set the value in the nvlist.
2504		 */
2505		if (err == 0)
2506			err = -1;
2507		break;
2508	case ZFS_PROP_RESERVATION:
2509		err = dsl_dir_set_reservation(dsname, source, intval);
2510		break;
2511	case ZFS_PROP_REFRESERVATION:
2512		err = dsl_dataset_set_refreservation(dsname, source, intval);
2513		break;
2514	case ZFS_PROP_COMPRESSION:
2515		err = dsl_dataset_set_compression(dsname, source, intval);
2516		/*
2517		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2518		 * default path to set the value in the nvlist.
2519		 */
2520		if (err == 0)
2521			err = -1;
2522		break;
2523	case ZFS_PROP_VOLSIZE:
2524		err = zvol_set_volsize(dsname, intval);
2525		break;
2526	case ZFS_PROP_VOLTHREADING:
2527		err = zvol_set_volthreading(dsname, intval);
2528		/*
2529		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2530		 * default path to set the value in the nvlist.
2531		 */
2532		if (err == 0)
2533			err = -1;
2534		break;
2535	case ZFS_PROP_SNAPDEV:
2536	case ZFS_PROP_VOLMODE:
2537		err = zvol_set_common(dsname, prop, source, intval);
2538		break;
2539	case ZFS_PROP_READONLY:
2540		err = zvol_set_ro(dsname, intval);
2541		/*
2542		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2543		 * default path to set the value in the nvlist.
2544		 */
2545		if (err == 0)
2546			err = -1;
2547		break;
2548	case ZFS_PROP_VERSION:
2549	{
2550		zfsvfs_t *zfsvfs;
2551
2552		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2553			break;
2554
2555		err = zfs_set_version(zfsvfs, intval);
2556		zfsvfs_rele(zfsvfs, FTAG);
2557
2558		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2559			zfs_cmd_t *zc;
2560
2561			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2562			(void) strlcpy(zc->zc_name, dsname,
2563			    sizeof (zc->zc_name));
2564			(void) zfs_ioc_userspace_upgrade(zc);
2565			(void) zfs_ioc_id_quota_upgrade(zc);
2566			kmem_free(zc, sizeof (zfs_cmd_t));
2567		}
2568		break;
2569	}
2570	default:
2571		err = -1;
2572	}
2573
2574	return (err);
2575}
2576
2577static boolean_t
2578zfs_is_namespace_prop(zfs_prop_t prop)
2579{
2580	switch (prop) {
2581
2582	case ZFS_PROP_ATIME:
2583	case ZFS_PROP_RELATIME:
2584	case ZFS_PROP_DEVICES:
2585	case ZFS_PROP_EXEC:
2586	case ZFS_PROP_SETUID:
2587	case ZFS_PROP_READONLY:
2588	case ZFS_PROP_XATTR:
2589	case ZFS_PROP_NBMAND:
2590		return (B_TRUE);
2591
2592	default:
2593		return (B_FALSE);
2594	}
2595}
2596
2597/*
2598 * This function is best effort. If it fails to set any of the given properties,
2599 * it continues to set as many as it can and returns the last error
2600 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2601 * with the list of names of all the properties that failed along with the
2602 * corresponding error numbers.
2603 *
2604 * If every property is set successfully, zero is returned and errlist is not
2605 * modified.
2606 */
2607int
2608zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2609    nvlist_t *errlist)
2610{
2611	nvpair_t *pair;
2612	nvpair_t *propval;
2613	int rv = 0;
2614	int err;
2615	uint64_t intval;
2616	const char *strval;
2617	boolean_t should_update_mount_cache = B_FALSE;
2618
2619	nvlist_t *genericnvl = fnvlist_alloc();
2620	nvlist_t *retrynvl = fnvlist_alloc();
2621retry:
2622	pair = NULL;
2623	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2624		const char *propname = nvpair_name(pair);
2625		zfs_prop_t prop = zfs_name_to_prop(propname);
2626		err = 0;
2627
2628		/* decode the property value */
2629		propval = pair;
2630		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2631			nvlist_t *attrs;
2632			attrs = fnvpair_value_nvlist(pair);
2633			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2634			    &propval) != 0)
2635				err = SET_ERROR(EINVAL);
2636		}
2637
2638		/* Validate value type */
2639		if (err == 0 && source == ZPROP_SRC_INHERITED) {
2640			/* inherited properties are expected to be booleans */
2641			if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
2642				err = SET_ERROR(EINVAL);
2643		} else if (err == 0 && prop == ZPROP_USERPROP) {
2644			if (zfs_prop_user(propname)) {
2645				if (nvpair_type(propval) != DATA_TYPE_STRING)
2646					err = SET_ERROR(EINVAL);
2647			} else if (zfs_prop_userquota(propname)) {
2648				if (nvpair_type(propval) !=
2649				    DATA_TYPE_UINT64_ARRAY)
2650					err = SET_ERROR(EINVAL);
2651			} else {
2652				err = SET_ERROR(EINVAL);
2653			}
2654		} else if (err == 0) {
2655			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2656				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2657					err = SET_ERROR(EINVAL);
2658			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2659				const char *unused;
2660
2661				intval = fnvpair_value_uint64(propval);
2662
2663				switch (zfs_prop_get_type(prop)) {
2664				case PROP_TYPE_NUMBER:
2665					break;
2666				case PROP_TYPE_STRING:
2667					err = SET_ERROR(EINVAL);
2668					break;
2669				case PROP_TYPE_INDEX:
2670					if (zfs_prop_index_to_string(prop,
2671					    intval, &unused) != 0)
2672						err =
2673						    SET_ERROR(ZFS_ERR_BADPROP);
2674					break;
2675				default:
2676					cmn_err(CE_PANIC,
2677					    "unknown property type");
2678				}
2679			} else {
2680				err = SET_ERROR(EINVAL);
2681			}
2682		}
2683
2684		/* Validate permissions */
2685		if (err == 0)
2686			err = zfs_check_settable(dsname, pair, CRED());
2687
2688		if (err == 0) {
2689			if (source == ZPROP_SRC_INHERITED)
2690				err = -1; /* does not need special handling */
2691			else
2692				err = zfs_prop_set_special(dsname, source,
2693				    pair);
2694			if (err == -1) {
2695				/*
2696				 * For better performance we build up a list of
2697				 * properties to set in a single transaction.
2698				 */
2699				err = nvlist_add_nvpair(genericnvl, pair);
2700			} else if (err != 0 && nvl != retrynvl) {
2701				/*
2702				 * This may be a spurious error caused by
2703				 * receiving quota and reservation out of order.
2704				 * Try again in a second pass.
2705				 */
2706				err = nvlist_add_nvpair(retrynvl, pair);
2707			}
2708		}
2709
2710		if (err != 0) {
2711			if (errlist != NULL)
2712				fnvlist_add_int32(errlist, propname, err);
2713			rv = err;
2714		}
2715
2716		if (zfs_is_namespace_prop(prop))
2717			should_update_mount_cache = B_TRUE;
2718	}
2719
2720	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2721		nvl = retrynvl;
2722		goto retry;
2723	}
2724
2725	if (nvlist_empty(genericnvl))
2726		goto out;
2727
2728	/*
2729	 * Try to set them all in one batch.
2730	 */
2731	err = dsl_props_set(dsname, source, genericnvl);
2732	if (err == 0)
2733		goto out;
2734
2735	/*
2736	 * If batching fails, we still want to set as many properties as we
2737	 * can, so try setting them individually.
2738	 */
2739	pair = NULL;
2740	while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2741		const char *propname = nvpair_name(pair);
2742
2743		propval = pair;
2744		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2745			nvlist_t *attrs;
2746			attrs = fnvpair_value_nvlist(pair);
2747			propval = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
2748		}
2749
2750		if (nvpair_type(propval) == DATA_TYPE_STRING) {
2751			strval = fnvpair_value_string(propval);
2752			err = dsl_prop_set_string(dsname, propname,
2753			    source, strval);
2754		} else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
2755			err = dsl_prop_inherit(dsname, propname, source);
2756		} else {
2757			intval = fnvpair_value_uint64(propval);
2758			err = dsl_prop_set_int(dsname, propname, source,
2759			    intval);
2760		}
2761
2762		if (err != 0) {
2763			if (errlist != NULL) {
2764				fnvlist_add_int32(errlist, propname, err);
2765			}
2766			rv = err;
2767		}
2768	}
2769
2770out:
2771	if (should_update_mount_cache)
2772		zfs_ioctl_update_mount_cache(dsname);
2773
2774	nvlist_free(genericnvl);
2775	nvlist_free(retrynvl);
2776
2777	return (rv);
2778}
2779
2780/*
2781 * Check that all the properties are valid user properties.
2782 */
2783static int
2784zfs_check_userprops(nvlist_t *nvl)
2785{
2786	nvpair_t *pair = NULL;
2787
2788	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2789		const char *propname = nvpair_name(pair);
2790
2791		if (!zfs_prop_user(propname) ||
2792		    nvpair_type(pair) != DATA_TYPE_STRING)
2793			return (SET_ERROR(EINVAL));
2794
2795		if (strlen(propname) >= ZAP_MAXNAMELEN)
2796			return (SET_ERROR(ENAMETOOLONG));
2797
2798		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2799			return (SET_ERROR(E2BIG));
2800	}
2801	return (0);
2802}
2803
2804static void
2805props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2806{
2807	nvpair_t *pair;
2808
2809	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2810
2811	pair = NULL;
2812	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2813		if (nvlist_exists(skipped, nvpair_name(pair)))
2814			continue;
2815
2816		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2817	}
2818}
2819
2820static int
2821clear_received_props(const char *dsname, nvlist_t *props,
2822    nvlist_t *skipped)
2823{
2824	int err = 0;
2825	nvlist_t *cleared_props = NULL;
2826	props_skip(props, skipped, &cleared_props);
2827	if (!nvlist_empty(cleared_props)) {
2828		/*
2829		 * Acts on local properties until the dataset has received
2830		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2831		 */
2832		zprop_source_t flags = (ZPROP_SRC_NONE |
2833		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2834		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2835	}
2836	nvlist_free(cleared_props);
2837	return (err);
2838}
2839
2840/*
2841 * inputs:
2842 * zc_name		name of filesystem
2843 * zc_value		name of property to set
2844 * zc_nvlist_src{_size}	nvlist of properties to apply
2845 * zc_cookie		received properties flag
2846 *
2847 * outputs:
2848 * zc_nvlist_dst{_size} error for each unapplied received property
2849 */
2850static int
2851zfs_ioc_set_prop(zfs_cmd_t *zc)
2852{
2853	nvlist_t *nvl;
2854	boolean_t received = zc->zc_cookie;
2855	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2856	    ZPROP_SRC_LOCAL);
2857	nvlist_t *errors;
2858	int error;
2859
2860	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2861	    zc->zc_iflags, &nvl)) != 0)
2862		return (error);
2863
2864	if (received) {
2865		nvlist_t *origprops;
2866
2867		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2868			(void) clear_received_props(zc->zc_name,
2869			    origprops, nvl);
2870			nvlist_free(origprops);
2871		}
2872
2873		error = dsl_prop_set_hasrecvd(zc->zc_name);
2874	}
2875
2876	errors = fnvlist_alloc();
2877	if (error == 0)
2878		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2879
2880	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2881		(void) put_nvlist(zc, errors);
2882	}
2883
2884	nvlist_free(errors);
2885	nvlist_free(nvl);
2886	return (error);
2887}
2888
2889/*
2890 * inputs:
2891 * zc_name		name of filesystem
2892 * zc_value		name of property to inherit
2893 * zc_cookie		revert to received value if TRUE
2894 *
2895 * outputs:		none
2896 */
2897static int
2898zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2899{
2900	const char *propname = zc->zc_value;
2901	zfs_prop_t prop = zfs_name_to_prop(propname);
2902	boolean_t received = zc->zc_cookie;
2903	zprop_source_t source = (received
2904	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2905	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2906	nvlist_t *dummy;
2907	nvpair_t *pair;
2908	zprop_type_t type;
2909	int err;
2910
2911	if (!received) {
2912		/*
2913		 * Only check this in the non-received case. We want to allow
2914		 * 'inherit -S' to revert non-inheritable properties like quota
2915		 * and reservation to the received or default values even though
2916		 * they are not considered inheritable.
2917		 */
2918		if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
2919			return (SET_ERROR(EINVAL));
2920	}
2921
2922	if (prop == ZPROP_USERPROP) {
2923		if (!zfs_prop_user(propname))
2924			return (SET_ERROR(EINVAL));
2925
2926		type = PROP_TYPE_STRING;
2927	} else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
2928		return (SET_ERROR(EINVAL));
2929	} else {
2930		type = zfs_prop_get_type(prop);
2931	}
2932
2933	/*
2934	 * zfs_prop_set_special() expects properties in the form of an
2935	 * nvpair with type info.
2936	 */
2937	dummy = fnvlist_alloc();
2938
2939	switch (type) {
2940	case PROP_TYPE_STRING:
2941		VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2942		break;
2943	case PROP_TYPE_NUMBER:
2944	case PROP_TYPE_INDEX:
2945		VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2946		break;
2947	default:
2948		err = SET_ERROR(EINVAL);
2949		goto errout;
2950	}
2951
2952	pair = nvlist_next_nvpair(dummy, NULL);
2953	if (pair == NULL) {
2954		err = SET_ERROR(EINVAL);
2955	} else {
2956		err = zfs_prop_set_special(zc->zc_name, source, pair);
2957		if (err == -1) /* property is not "special", needs handling */
2958			err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
2959			    source);
2960	}
2961
2962errout:
2963	nvlist_free(dummy);
2964	return (err);
2965}
2966
2967static int
2968zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2969{
2970	nvlist_t *props;
2971	spa_t *spa;
2972	int error;
2973	nvpair_t *pair;
2974
2975	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2976	    zc->zc_iflags, &props)))
2977		return (error);
2978
2979	/*
2980	 * If the only property is the configfile, then just do a spa_lookup()
2981	 * to handle the faulted case.
2982	 */
2983	pair = nvlist_next_nvpair(props, NULL);
2984	if (pair != NULL && strcmp(nvpair_name(pair),
2985	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2986	    nvlist_next_nvpair(props, pair) == NULL) {
2987		mutex_enter(&spa_namespace_lock);
2988		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2989			spa_configfile_set(spa, props, B_FALSE);
2990			spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
2991		}
2992		mutex_exit(&spa_namespace_lock);
2993		if (spa != NULL) {
2994			nvlist_free(props);
2995			return (0);
2996		}
2997	}
2998
2999	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3000		nvlist_free(props);
3001		return (error);
3002	}
3003
3004	error = spa_prop_set(spa, props);
3005
3006	nvlist_free(props);
3007	spa_close(spa, FTAG);
3008
3009	return (error);
3010}
3011
3012static int
3013zfs_ioc_pool_get_props(zfs_cmd_t *zc)
3014{
3015	spa_t *spa;
3016	int error;
3017	nvlist_t *nvp = NULL;
3018
3019	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3020		/*
3021		 * If the pool is faulted, there may be properties we can still
3022		 * get (such as altroot and cachefile), so attempt to get them
3023		 * anyway.
3024		 */
3025		mutex_enter(&spa_namespace_lock);
3026		if ((spa = spa_lookup(zc->zc_name)) != NULL)
3027			error = spa_prop_get(spa, &nvp);
3028		mutex_exit(&spa_namespace_lock);
3029	} else {
3030		error = spa_prop_get(spa, &nvp);
3031		spa_close(spa, FTAG);
3032	}
3033
3034	if (error == 0 && zc->zc_nvlist_dst != 0)
3035		error = put_nvlist(zc, nvp);
3036	else
3037		error = SET_ERROR(EFAULT);
3038
3039	nvlist_free(nvp);
3040	return (error);
3041}
3042
3043/*
3044 * innvl: {
3045 *     "vdevprops_set_vdev" -> guid
3046 *     "vdevprops_set_props" -> { prop -> value }
3047 * }
3048 *
3049 * outnvl: propname -> error code (int32)
3050 */
3051static const zfs_ioc_key_t zfs_keys_vdev_set_props[] = {
3052	{ZPOOL_VDEV_PROPS_SET_VDEV,	DATA_TYPE_UINT64,	0},
3053	{ZPOOL_VDEV_PROPS_SET_PROPS,	DATA_TYPE_NVLIST,	0}
3054};
3055
3056static int
3057zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3058{
3059	spa_t *spa;
3060	int error;
3061	vdev_t *vd;
3062	uint64_t vdev_guid;
3063
3064	/* Early validation */
3065	if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
3066	    &vdev_guid) != 0)
3067		return (SET_ERROR(EINVAL));
3068
3069	if (outnvl == NULL)
3070		return (SET_ERROR(EINVAL));
3071
3072	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3073		return (error);
3074
3075	ASSERT(spa_writeable(spa));
3076
3077	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3078		spa_close(spa, FTAG);
3079		return (SET_ERROR(ENOENT));
3080	}
3081
3082	error = vdev_prop_set(vd, innvl, outnvl);
3083
3084	spa_close(spa, FTAG);
3085
3086	return (error);
3087}
3088
3089/*
3090 * innvl: {
3091 *     "vdevprops_get_vdev" -> guid
3092 *     (optional) "vdevprops_get_props" -> { propname -> propid }
3093 * }
3094 *
3095 * outnvl: propname -> value
3096 */
3097static const zfs_ioc_key_t zfs_keys_vdev_get_props[] = {
3098	{ZPOOL_VDEV_PROPS_GET_VDEV,	DATA_TYPE_UINT64,	0},
3099	{ZPOOL_VDEV_PROPS_GET_PROPS,	DATA_TYPE_NVLIST,	ZK_OPTIONAL}
3100};
3101
3102static int
3103zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3104{
3105	spa_t *spa;
3106	int error;
3107	vdev_t *vd;
3108	uint64_t vdev_guid;
3109
3110	/* Early validation */
3111	if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV,
3112	    &vdev_guid) != 0)
3113		return (SET_ERROR(EINVAL));
3114
3115	if (outnvl == NULL)
3116		return (SET_ERROR(EINVAL));
3117
3118	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3119		return (error);
3120
3121	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3122		spa_close(spa, FTAG);
3123		return (SET_ERROR(ENOENT));
3124	}
3125
3126	error = vdev_prop_get(vd, innvl, outnvl);
3127
3128	spa_close(spa, FTAG);
3129
3130	return (error);
3131}
3132
3133/*
3134 * inputs:
3135 * zc_name		name of filesystem
3136 * zc_nvlist_src{_size}	nvlist of delegated permissions
3137 * zc_perm_action	allow/unallow flag
3138 *
3139 * outputs:		none
3140 */
3141static int
3142zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3143{
3144	int error;
3145	nvlist_t *fsaclnv = NULL;
3146
3147	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3148	    zc->zc_iflags, &fsaclnv)) != 0)
3149		return (error);
3150
3151	/*
3152	 * Verify nvlist is constructed correctly
3153	 */
3154	if (zfs_deleg_verify_nvlist(fsaclnv) != 0) {
3155		nvlist_free(fsaclnv);
3156		return (SET_ERROR(EINVAL));
3157	}
3158
3159	/*
3160	 * If we don't have PRIV_SYS_MOUNT, then validate
3161	 * that user is allowed to hand out each permission in
3162	 * the nvlist(s)
3163	 */
3164
3165	error = secpolicy_zfs(CRED());
3166	if (error != 0) {
3167		if (zc->zc_perm_action == B_FALSE) {
3168			error = dsl_deleg_can_allow(zc->zc_name,
3169			    fsaclnv, CRED());
3170		} else {
3171			error = dsl_deleg_can_unallow(zc->zc_name,
3172			    fsaclnv, CRED());
3173		}
3174	}
3175
3176	if (error == 0)
3177		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3178
3179	nvlist_free(fsaclnv);
3180	return (error);
3181}
3182
3183/*
3184 * inputs:
3185 * zc_name		name of filesystem
3186 *
3187 * outputs:
3188 * zc_nvlist_src{_size}	nvlist of delegated permissions
3189 */
3190static int
3191zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3192{
3193	nvlist_t *nvp;
3194	int error;
3195
3196	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3197		error = put_nvlist(zc, nvp);
3198		nvlist_free(nvp);
3199	}
3200
3201	return (error);
3202}
3203
3204static void
3205zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3206{
3207	zfs_creat_t *zct = arg;
3208
3209	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3210}
3211
3212#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3213
3214/*
3215 * inputs:
3216 * os			parent objset pointer (NULL if root fs)
3217 * fuids_ok		fuids allowed in this version of the spa?
3218 * sa_ok		SAs allowed in this version of the spa?
3219 * createprops		list of properties requested by creator
3220 *
3221 * outputs:
3222 * zplprops	values for the zplprops we attach to the master node object
3223 * is_ci	true if requested file system will be purely case-insensitive
3224 *
3225 * Determine the settings for utf8only, normalization and
3226 * casesensitivity.  Specific values may have been requested by the
3227 * creator and/or we can inherit values from the parent dataset.  If
3228 * the file system is of too early a vintage, a creator can not
3229 * request settings for these properties, even if the requested
3230 * setting is the default value.  We don't actually want to create dsl
3231 * properties for these, so remove them from the source nvlist after
3232 * processing.
3233 */
3234static int
3235zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3236    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3237    nvlist_t *zplprops, boolean_t *is_ci)
3238{
3239	uint64_t sense = ZFS_PROP_UNDEFINED;
3240	uint64_t norm = ZFS_PROP_UNDEFINED;
3241	uint64_t u8 = ZFS_PROP_UNDEFINED;
3242	int error;
3243
3244	ASSERT(zplprops != NULL);
3245
3246	/* parent dataset must be a filesystem */
3247	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3248		return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
3249
3250	/*
3251	 * Pull out creator prop choices, if any.
3252	 */
3253	if (createprops) {
3254		(void) nvlist_lookup_uint64(createprops,
3255		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3256		(void) nvlist_lookup_uint64(createprops,
3257		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3258		(void) nvlist_remove_all(createprops,
3259		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3260		(void) nvlist_lookup_uint64(createprops,
3261		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3262		(void) nvlist_remove_all(createprops,
3263		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3264		(void) nvlist_lookup_uint64(createprops,
3265		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3266		(void) nvlist_remove_all(createprops,
3267		    zfs_prop_to_name(ZFS_PROP_CASE));
3268	}
3269
3270	/*
3271	 * If the zpl version requested is whacky or the file system
3272	 * or pool is version is too "young" to support normalization
3273	 * and the creator tried to set a value for one of the props,
3274	 * error out.
3275	 */
3276	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3277	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3278	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3279	    (zplver < ZPL_VERSION_NORMALIZATION &&
3280	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3281	    sense != ZFS_PROP_UNDEFINED)))
3282		return (SET_ERROR(ENOTSUP));
3283
3284	/*
3285	 * Put the version in the zplprops
3286	 */
3287	VERIFY(nvlist_add_uint64(zplprops,
3288	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3289
3290	if (norm == ZFS_PROP_UNDEFINED &&
3291	    (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
3292		return (error);
3293	VERIFY(nvlist_add_uint64(zplprops,
3294	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3295
3296	/*
3297	 * If we're normalizing, names must always be valid UTF-8 strings.
3298	 */
3299	if (norm)
3300		u8 = 1;
3301	if (u8 == ZFS_PROP_UNDEFINED &&
3302	    (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
3303		return (error);
3304	VERIFY(nvlist_add_uint64(zplprops,
3305	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3306
3307	if (sense == ZFS_PROP_UNDEFINED &&
3308	    (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
3309		return (error);
3310	VERIFY(nvlist_add_uint64(zplprops,
3311	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3312
3313	if (is_ci)
3314		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3315
3316	return (0);
3317}
3318
3319static int
3320zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3321    nvlist_t *zplprops, boolean_t *is_ci)
3322{
3323	boolean_t fuids_ok, sa_ok;
3324	uint64_t zplver = ZPL_VERSION;
3325	objset_t *os = NULL;
3326	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3327	spa_t *spa;
3328	uint64_t spa_vers;
3329	int error;
3330
3331	zfs_get_parent(dataset, parentname, sizeof (parentname));
3332
3333	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3334		return (error);
3335
3336	spa_vers = spa_version(spa);
3337	spa_close(spa, FTAG);
3338
3339	zplver = zfs_zpl_version_map(spa_vers);
3340	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3341	sa_ok = (zplver >= ZPL_VERSION_SA);
3342
3343	/*
3344	 * Open parent object set so we can inherit zplprop values.
3345	 */
3346	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3347		return (error);
3348
3349	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3350	    zplprops, is_ci);
3351	dmu_objset_rele(os, FTAG);
3352	return (error);
3353}
3354
3355static int
3356zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3357    nvlist_t *zplprops, boolean_t *is_ci)
3358{
3359	boolean_t fuids_ok;
3360	boolean_t sa_ok;
3361	uint64_t zplver = ZPL_VERSION;
3362	int error;
3363
3364	zplver = zfs_zpl_version_map(spa_vers);
3365	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3366	sa_ok = (zplver >= ZPL_VERSION_SA);
3367
3368	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3369	    createprops, zplprops, is_ci);
3370	return (error);
3371}
3372
3373/*
3374 * innvl: {
3375 *     "type" -> dmu_objset_type_t (int32)
3376 *     (optional) "props" -> { prop -> value }
3377 *     (optional) "hidden_args" -> { "wkeydata" -> value }
3378 *         raw uint8_t array of encryption wrapping key data (32 bytes)
3379 * }
3380 *
3381 * outnvl: propname -> error code (int32)
3382 */
3383
3384static const zfs_ioc_key_t zfs_keys_create[] = {
3385	{"type",	DATA_TYPE_INT32,	0},
3386	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3387	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3388};
3389
3390static int
3391zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3392{
3393	int error = 0;
3394	zfs_creat_t zct = { 0 };
3395	nvlist_t *nvprops = NULL;
3396	nvlist_t *hidden_args = NULL;
3397	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3398	dmu_objset_type_t type;
3399	boolean_t is_insensitive = B_FALSE;
3400	dsl_crypto_params_t *dcp = NULL;
3401
3402	type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
3403	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3404	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3405
3406	switch (type) {
3407	case DMU_OST_ZFS:
3408		cbfunc = zfs_create_cb;
3409		break;
3410
3411	case DMU_OST_ZVOL:
3412		cbfunc = zvol_create_cb;
3413		break;
3414
3415	default:
3416		cbfunc = NULL;
3417		break;
3418	}
3419	if (strchr(fsname, '@') ||
3420	    strchr(fsname, '%'))
3421		return (SET_ERROR(EINVAL));
3422
3423	zct.zct_props = nvprops;
3424
3425	if (cbfunc == NULL)
3426		return (SET_ERROR(EINVAL));
3427
3428	if (type == DMU_OST_ZVOL) {
3429		uint64_t volsize, volblocksize;
3430
3431		if (nvprops == NULL)
3432			return (SET_ERROR(EINVAL));
3433		if (nvlist_lookup_uint64(nvprops,
3434		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3435			return (SET_ERROR(EINVAL));
3436
3437		if ((error = nvlist_lookup_uint64(nvprops,
3438		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3439		    &volblocksize)) != 0 && error != ENOENT)
3440			return (SET_ERROR(EINVAL));
3441
3442		if (error != 0)
3443			volblocksize = zfs_prop_default_numeric(
3444			    ZFS_PROP_VOLBLOCKSIZE);
3445
3446		if ((error = zvol_check_volblocksize(fsname,
3447		    volblocksize)) != 0 ||
3448		    (error = zvol_check_volsize(volsize,
3449		    volblocksize)) != 0)
3450			return (error);
3451	} else if (type == DMU_OST_ZFS) {
3452		int error;
3453
3454		/*
3455		 * We have to have normalization and
3456		 * case-folding flags correct when we do the
3457		 * file system creation, so go figure them out
3458		 * now.
3459		 */
3460		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3461		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3462		error = zfs_fill_zplprops(fsname, nvprops,
3463		    zct.zct_zplprops, &is_insensitive);
3464		if (error != 0) {
3465			nvlist_free(zct.zct_zplprops);
3466			return (error);
3467		}
3468	}
3469
3470	error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3471	    hidden_args, &dcp);
3472	if (error != 0) {
3473		nvlist_free(zct.zct_zplprops);
3474		return (error);
3475	}
3476
3477	error = dmu_objset_create(fsname, type,
3478	    is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3479
3480	nvlist_free(zct.zct_zplprops);
3481	dsl_crypto_params_free(dcp, !!error);
3482
3483	/*
3484	 * It would be nice to do this atomically.
3485	 */
3486	if (error == 0) {
3487		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3488		    nvprops, outnvl);
3489		if (error != 0) {
3490			spa_t *spa;
3491			int error2;
3492
3493			/*
3494			 * Volumes will return EBUSY and cannot be destroyed
3495			 * until all asynchronous minor handling (e.g. from
3496			 * setting the volmode property) has completed. Wait for
3497			 * the spa_zvol_taskq to drain then retry.
3498			 */
3499			error2 = dsl_destroy_head(fsname);
3500			while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
3501				error2 = spa_open(fsname, &spa, FTAG);
3502				if (error2 == 0) {
3503					taskq_wait(spa->spa_zvol_taskq);
3504					spa_close(spa, FTAG);
3505				}
3506				error2 = dsl_destroy_head(fsname);
3507			}
3508		}
3509	}
3510	return (error);
3511}
3512
3513/*
3514 * innvl: {
3515 *     "origin" -> name of origin snapshot
3516 *     (optional) "props" -> { prop -> value }
3517 *     (optional) "hidden_args" -> { "wkeydata" -> value }
3518 *         raw uint8_t array of encryption wrapping key data (32 bytes)
3519 * }
3520 *
3521 * outputs:
3522 * outnvl: propname -> error code (int32)
3523 */
3524static const zfs_ioc_key_t zfs_keys_clone[] = {
3525	{"origin",	DATA_TYPE_STRING,	0},
3526	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3527	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3528};
3529
3530static int
3531zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3532{
3533	int error = 0;
3534	nvlist_t *nvprops = NULL;
3535	const char *origin_name;
3536
3537	origin_name = fnvlist_lookup_string(innvl, "origin");
3538	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3539
3540	if (strchr(fsname, '@') ||
3541	    strchr(fsname, '%'))
3542		return (SET_ERROR(EINVAL));
3543
3544	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3545		return (SET_ERROR(EINVAL));
3546
3547	error = dmu_objset_clone(fsname, origin_name);
3548
3549	/*
3550	 * It would be nice to do this atomically.
3551	 */
3552	if (error == 0) {
3553		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3554		    nvprops, outnvl);
3555		if (error != 0)
3556			(void) dsl_destroy_head(fsname);
3557	}
3558	return (error);
3559}
3560
3561static const zfs_ioc_key_t zfs_keys_remap[] = {
3562	/* no nvl keys */
3563};
3564
3565static int
3566zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3567{
3568	/* This IOCTL is no longer supported. */
3569	(void) fsname, (void) innvl, (void) outnvl;
3570	return (0);
3571}
3572
3573/*
3574 * innvl: {
3575 *     "snaps" -> { snapshot1, snapshot2 }
3576 *     (optional) "props" -> { prop -> value (string) }
3577 * }
3578 *
3579 * outnvl: snapshot -> error code (int32)
3580 */
3581static const zfs_ioc_key_t zfs_keys_snapshot[] = {
3582	{"snaps",	DATA_TYPE_NVLIST,	0},
3583	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3584};
3585
3586static int
3587zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3588{
3589	nvlist_t *snaps;
3590	nvlist_t *props = NULL;
3591	int error, poollen;
3592	nvpair_t *pair;
3593
3594	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3595	if (!nvlist_empty(props) &&
3596	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3597		return (SET_ERROR(ENOTSUP));
3598	if ((error = zfs_check_userprops(props)) != 0)
3599		return (error);
3600
3601	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
3602	poollen = strlen(poolname);
3603	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3604	    pair = nvlist_next_nvpair(snaps, pair)) {
3605		const char *name = nvpair_name(pair);
3606		char *cp = strchr(name, '@');
3607
3608		/*
3609		 * The snap name must contain an @, and the part after it must
3610		 * contain only valid characters.
3611		 */
3612		if (cp == NULL ||
3613		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3614			return (SET_ERROR(EINVAL));
3615
3616		/*
3617		 * The snap must be in the specified pool.
3618		 */
3619		if (strncmp(name, poolname, poollen) != 0 ||
3620		    (name[poollen] != '/' && name[poollen] != '@'))
3621			return (SET_ERROR(EXDEV));
3622
3623		/*
3624		 * Check for permission to set the properties on the fs.
3625		 */
3626		if (!nvlist_empty(props)) {
3627			*cp = '\0';
3628			error = zfs_secpolicy_write_perms(name,
3629			    ZFS_DELEG_PERM_USERPROP, CRED());
3630			*cp = '@';
3631			if (error != 0)
3632				return (error);
3633		}
3634
3635		/* This must be the only snap of this fs. */
3636		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3637		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3638			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3639			    == 0) {
3640				return (SET_ERROR(EXDEV));
3641			}
3642		}
3643	}
3644
3645	error = dsl_dataset_snapshot(snaps, props, outnvl);
3646
3647	return (error);
3648}
3649
3650/*
3651 * innvl: "message" -> string
3652 */
3653static const zfs_ioc_key_t zfs_keys_log_history[] = {
3654	{"message",	DATA_TYPE_STRING,	0},
3655};
3656
3657static int
3658zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3659{
3660	(void) unused, (void) outnvl;
3661	const char *message;
3662	char *poolname;
3663	spa_t *spa;
3664	int error;
3665
3666	/*
3667	 * The poolname in the ioctl is not set, we get it from the TSD,
3668	 * which was set at the end of the last successful ioctl that allows
3669	 * logging.  The secpolicy func already checked that it is set.
3670	 * Only one log ioctl is allowed after each successful ioctl, so
3671	 * we clear the TSD here.
3672	 */
3673	poolname = tsd_get(zfs_allow_log_key);
3674	if (poolname == NULL)
3675		return (SET_ERROR(EINVAL));
3676	(void) tsd_set(zfs_allow_log_key, NULL);
3677	error = spa_open(poolname, &spa, FTAG);
3678	kmem_strfree(poolname);
3679	if (error != 0)
3680		return (error);
3681
3682	message = fnvlist_lookup_string(innvl, "message");
3683
3684	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3685		spa_close(spa, FTAG);
3686		return (SET_ERROR(ENOTSUP));
3687	}
3688
3689	error = spa_history_log(spa, message);
3690	spa_close(spa, FTAG);
3691	return (error);
3692}
3693
3694/*
3695 * This ioctl is used to set the bootenv configuration on the current
3696 * pool. This configuration is stored in the second padding area of the label,
3697 * and it is used by the bootloader(s) to store the bootloader and/or system
3698 * specific data.
3699 * The data is stored as nvlist data stream, and is protected by
3700 * an embedded checksum.
3701 * The version can have two possible values:
3702 * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
3703 * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
3704 */
3705static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
3706	{"version",	DATA_TYPE_UINT64,	0},
3707	{"<keys>",	DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
3708};
3709
3710static int
3711zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
3712{
3713	int error;
3714	spa_t *spa;
3715
3716	if ((error = spa_open(name, &spa, FTAG)) != 0)
3717		return (error);
3718	spa_vdev_state_enter(spa, SCL_ALL);
3719	error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
3720	(void) spa_vdev_state_exit(spa, NULL, 0);
3721	spa_close(spa, FTAG);
3722	return (error);
3723}
3724
3725static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
3726	/* no nvl keys */
3727};
3728
3729static int
3730zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
3731{
3732	spa_t *spa;
3733	int error;
3734
3735	if ((error = spa_open(name, &spa, FTAG)) != 0)
3736		return (error);
3737	spa_vdev_state_enter(spa, SCL_ALL);
3738	error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
3739	(void) spa_vdev_state_exit(spa, NULL, 0);
3740	spa_close(spa, FTAG);
3741	return (error);
3742}
3743
3744/*
3745 * The dp_config_rwlock must not be held when calling this, because the
3746 * unmount may need to write out data.
3747 *
3748 * This function is best-effort.  Callers must deal gracefully if it
3749 * remains mounted (or is remounted after this call).
3750 *
3751 * Returns 0 if the argument is not a snapshot, or it is not currently a
3752 * filesystem, or we were able to unmount it.  Returns error code otherwise.
3753 */
3754void
3755zfs_unmount_snap(const char *snapname)
3756{
3757	if (strchr(snapname, '@') == NULL)
3758		return;
3759
3760	(void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
3761}
3762
3763static int
3764zfs_unmount_snap_cb(const char *snapname, void *arg)
3765{
3766	(void) arg;
3767	zfs_unmount_snap(snapname);
3768	return (0);
3769}
3770
3771/*
3772 * When a clone is destroyed, its origin may also need to be destroyed,
3773 * in which case it must be unmounted.  This routine will do that unmount
3774 * if necessary.
3775 */
3776void
3777zfs_destroy_unmount_origin(const char *fsname)
3778{
3779	int error;
3780	objset_t *os;
3781	dsl_dataset_t *ds;
3782
3783	error = dmu_objset_hold(fsname, FTAG, &os);
3784	if (error != 0)
3785		return;
3786	ds = dmu_objset_ds(os);
3787	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3788		char originname[ZFS_MAX_DATASET_NAME_LEN];
3789		dsl_dataset_name(ds->ds_prev, originname);
3790		dmu_objset_rele(os, FTAG);
3791		zfs_unmount_snap(originname);
3792	} else {
3793		dmu_objset_rele(os, FTAG);
3794	}
3795}
3796
3797/*
3798 * innvl: {
3799 *     "snaps" -> { snapshot1, snapshot2 }
3800 *     (optional boolean) "defer"
3801 * }
3802 *
3803 * outnvl: snapshot -> error code (int32)
3804 */
3805static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
3806	{"snaps",	DATA_TYPE_NVLIST,	0},
3807	{"defer",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
3808};
3809
3810static int
3811zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3812{
3813	int poollen;
3814	nvlist_t *snaps;
3815	nvpair_t *pair;
3816	boolean_t defer;
3817	spa_t *spa;
3818
3819	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
3820	defer = nvlist_exists(innvl, "defer");
3821
3822	poollen = strlen(poolname);
3823	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3824	    pair = nvlist_next_nvpair(snaps, pair)) {
3825		const char *name = nvpair_name(pair);
3826
3827		/*
3828		 * The snap must be in the specified pool to prevent the
3829		 * invalid removal of zvol minors below.
3830		 */
3831		if (strncmp(name, poolname, poollen) != 0 ||
3832		    (name[poollen] != '/' && name[poollen] != '@'))
3833			return (SET_ERROR(EXDEV));
3834
3835		zfs_unmount_snap(nvpair_name(pair));
3836		if (spa_open(name, &spa, FTAG) == 0) {
3837			zvol_remove_minors(spa, name, B_TRUE);
3838			spa_close(spa, FTAG);
3839		}
3840	}
3841
3842	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3843}
3844
3845/*
3846 * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
3847 * All bookmarks and snapshots must be in the same pool.
3848 * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
3849 *
3850 * innvl: {
3851 *     new_bookmark1 -> existing_snapshot,
3852 *     new_bookmark2 -> existing_bookmark,
3853 * }
3854 *
3855 * outnvl: bookmark -> error code (int32)
3856 *
3857 */
3858static const zfs_ioc_key_t zfs_keys_bookmark[] = {
3859	{"<bookmark>...",	DATA_TYPE_STRING,	ZK_WILDCARDLIST},
3860};
3861
3862static int
3863zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3864{
3865	(void) poolname;
3866	return (dsl_bookmark_create(innvl, outnvl));
3867}
3868
3869/*
3870 * innvl: {
3871 *     property 1, property 2, ...
3872 * }
3873 *
3874 * outnvl: {
3875 *     bookmark name 1 -> { property 1, property 2, ... },
3876 *     bookmark name 2 -> { property 1, property 2, ... }
3877 * }
3878 *
3879 */
3880static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
3881	{"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
3882};
3883
3884static int
3885zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3886{
3887	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3888}
3889
3890/*
3891 * innvl is not used.
3892 *
3893 * outnvl: {
3894 *     property 1, property 2, ...
3895 * }
3896 *
3897 */
3898static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
3899	/* no nvl keys */
3900};
3901
3902static int
3903zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
3904    nvlist_t *outnvl)
3905{
3906	(void) innvl;
3907	char fsname[ZFS_MAX_DATASET_NAME_LEN];
3908	char *bmname;
3909
3910	bmname = strchr(bookmark, '#');
3911	if (bmname == NULL)
3912		return (SET_ERROR(EINVAL));
3913	bmname++;
3914
3915	(void) strlcpy(fsname, bookmark, sizeof (fsname));
3916	*(strchr(fsname, '#')) = '\0';
3917
3918	return (dsl_get_bookmark_props(fsname, bmname, outnvl));
3919}
3920
3921/*
3922 * innvl: {
3923 *     bookmark name 1, bookmark name 2
3924 * }
3925 *
3926 * outnvl: bookmark -> error code (int32)
3927 *
3928 */
3929static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
3930	{"<bookmark>...",	DATA_TYPE_BOOLEAN,	ZK_WILDCARDLIST},
3931};
3932
3933static int
3934zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3935    nvlist_t *outnvl)
3936{
3937	int error, poollen;
3938
3939	poollen = strlen(poolname);
3940	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3941	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3942		const char *name = nvpair_name(pair);
3943		const char *cp = strchr(name, '#');
3944
3945		/*
3946		 * The bookmark name must contain an #, and the part after it
3947		 * must contain only valid characters.
3948		 */
3949		if (cp == NULL ||
3950		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3951			return (SET_ERROR(EINVAL));
3952
3953		/*
3954		 * The bookmark must be in the specified pool.
3955		 */
3956		if (strncmp(name, poolname, poollen) != 0 ||
3957		    (name[poollen] != '/' && name[poollen] != '#'))
3958			return (SET_ERROR(EXDEV));
3959	}
3960
3961	error = dsl_bookmark_destroy(innvl, outnvl);
3962	return (error);
3963}
3964
3965static const zfs_ioc_key_t zfs_keys_channel_program[] = {
3966	{"program",	DATA_TYPE_STRING,		0},
3967	{"arg",		DATA_TYPE_ANY,			0},
3968	{"sync",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
3969	{"instrlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
3970	{"memlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
3971};
3972
3973static int
3974zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3975    nvlist_t *outnvl)
3976{
3977	const char *program;
3978	uint64_t instrlimit, memlimit;
3979	boolean_t sync_flag;
3980	nvpair_t *nvarg = NULL;
3981
3982	program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
3983	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3984		sync_flag = B_TRUE;
3985	}
3986	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3987		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3988	}
3989	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3990		memlimit = ZCP_DEFAULT_MEMLIMIT;
3991	}
3992	nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
3993
3994	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3995		return (SET_ERROR(EINVAL));
3996	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3997		return (SET_ERROR(EINVAL));
3998
3999	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
4000	    nvarg, outnvl));
4001}
4002
4003/*
4004 * innvl: unused
4005 * outnvl: empty
4006 */
4007static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
4008	/* no nvl keys */
4009};
4010
4011static int
4012zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4013{
4014	(void) innvl, (void) outnvl;
4015	return (spa_checkpoint(poolname));
4016}
4017
4018/*
4019 * innvl: unused
4020 * outnvl: empty
4021 */
4022static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
4023	/* no nvl keys */
4024};
4025
4026static int
4027zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
4028    nvlist_t *outnvl)
4029{
4030	(void) innvl, (void) outnvl;
4031	return (spa_checkpoint_discard(poolname));
4032}
4033
4034/*
4035 * inputs:
4036 * zc_name		name of dataset to destroy
4037 * zc_defer_destroy	mark for deferred destroy
4038 *
4039 * outputs:		none
4040 */
4041static int
4042zfs_ioc_destroy(zfs_cmd_t *zc)
4043{
4044	objset_t *os;
4045	dmu_objset_type_t ost;
4046	int err;
4047
4048	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4049	if (err != 0)
4050		return (err);
4051	ost = dmu_objset_type(os);
4052	dmu_objset_rele(os, FTAG);
4053
4054	if (ost == DMU_OST_ZFS)
4055		zfs_unmount_snap(zc->zc_name);
4056
4057	if (strchr(zc->zc_name, '@')) {
4058		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
4059	} else {
4060		err = dsl_destroy_head(zc->zc_name);
4061		if (err == EEXIST) {
4062			/*
4063			 * It is possible that the given DS may have
4064			 * hidden child (%recv) datasets - "leftovers"
4065			 * resulting from the previously interrupted
4066			 * 'zfs receive'.
4067			 *
4068			 * 6 extra bytes for /%recv
4069			 */
4070			char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
4071
4072			if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
4073			    zc->zc_name, recv_clone_name) >=
4074			    sizeof (namebuf))
4075				return (SET_ERROR(EINVAL));
4076
4077			/*
4078			 * Try to remove the hidden child (%recv) and after
4079			 * that try to remove the target dataset.
4080			 * If the hidden child (%recv) does not exist
4081			 * the original error (EEXIST) will be returned
4082			 */
4083			err = dsl_destroy_head(namebuf);
4084			if (err == 0)
4085				err = dsl_destroy_head(zc->zc_name);
4086			else if (err == ENOENT)
4087				err = SET_ERROR(EEXIST);
4088		}
4089	}
4090
4091	return (err);
4092}
4093
4094/*
4095 * innvl: {
4096 *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
4097 *     "initialize_vdevs": { -> guids to initialize (nvlist)
4098 *         "vdev_path_1": vdev_guid_1, (uint64),
4099 *         "vdev_path_2": vdev_guid_2, (uint64),
4100 *         ...
4101 *     },
4102 * }
4103 *
4104 * outnvl: {
4105 *     "initialize_vdevs": { -> initialization errors (nvlist)
4106 *         "vdev_path_1": errno, see function body for possible errnos (uint64)
4107 *         "vdev_path_2": errno, ... (uint64)
4108 *         ...
4109 *     }
4110 * }
4111 *
4112 * EINVAL is returned for an unknown commands or if any of the provided vdev
4113 * guids have be specified with a type other than uint64.
4114 */
4115static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
4116	{ZPOOL_INITIALIZE_COMMAND,	DATA_TYPE_UINT64,	0},
4117	{ZPOOL_INITIALIZE_VDEVS,	DATA_TYPE_NVLIST,	0}
4118};
4119
4120static int
4121zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4122{
4123	uint64_t cmd_type;
4124	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
4125	    &cmd_type) != 0) {
4126		return (SET_ERROR(EINVAL));
4127	}
4128
4129	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
4130	    cmd_type == POOL_INITIALIZE_START ||
4131	    cmd_type == POOL_INITIALIZE_SUSPEND ||
4132	    cmd_type == POOL_INITIALIZE_UNINIT)) {
4133		return (SET_ERROR(EINVAL));
4134	}
4135
4136	nvlist_t *vdev_guids;
4137	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
4138	    &vdev_guids) != 0) {
4139		return (SET_ERROR(EINVAL));
4140	}
4141
4142	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4143	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4144		uint64_t vdev_guid;
4145		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4146			return (SET_ERROR(EINVAL));
4147		}
4148	}
4149
4150	spa_t *spa;
4151	int error = spa_open(poolname, &spa, FTAG);
4152	if (error != 0)
4153		return (error);
4154
4155	nvlist_t *vdev_errlist = fnvlist_alloc();
4156	int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
4157	    vdev_errlist);
4158
4159	if (fnvlist_size(vdev_errlist) > 0) {
4160		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
4161		    vdev_errlist);
4162	}
4163	fnvlist_free(vdev_errlist);
4164
4165	spa_close(spa, FTAG);
4166	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4167}
4168
4169/*
4170 * innvl: {
4171 *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
4172 *     "trim_vdevs": { -> guids to TRIM (nvlist)
4173 *         "vdev_path_1": vdev_guid_1, (uint64),
4174 *         "vdev_path_2": vdev_guid_2, (uint64),
4175 *         ...
4176 *     },
4177 *     "trim_rate" -> Target TRIM rate in bytes/sec.
4178 *     "trim_secure" -> Set to request a secure TRIM.
4179 * }
4180 *
4181 * outnvl: {
4182 *     "trim_vdevs": { -> TRIM errors (nvlist)
4183 *         "vdev_path_1": errno, see function body for possible errnos (uint64)
4184 *         "vdev_path_2": errno, ... (uint64)
4185 *         ...
4186 *     }
4187 * }
4188 *
4189 * EINVAL is returned for an unknown commands or if any of the provided vdev
4190 * guids have be specified with a type other than uint64.
4191 */
4192static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
4193	{ZPOOL_TRIM_COMMAND,	DATA_TYPE_UINT64,		0},
4194	{ZPOOL_TRIM_VDEVS,	DATA_TYPE_NVLIST,		0},
4195	{ZPOOL_TRIM_RATE,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4196	{ZPOOL_TRIM_SECURE,	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
4197};
4198
4199static int
4200zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4201{
4202	uint64_t cmd_type;
4203	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
4204		return (SET_ERROR(EINVAL));
4205
4206	if (!(cmd_type == POOL_TRIM_CANCEL ||
4207	    cmd_type == POOL_TRIM_START ||
4208	    cmd_type == POOL_TRIM_SUSPEND)) {
4209		return (SET_ERROR(EINVAL));
4210	}
4211
4212	nvlist_t *vdev_guids;
4213	if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
4214		return (SET_ERROR(EINVAL));
4215
4216	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4217	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4218		uint64_t vdev_guid;
4219		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4220			return (SET_ERROR(EINVAL));
4221		}
4222	}
4223
4224	/* Optional, defaults to maximum rate when not provided */
4225	uint64_t rate;
4226	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
4227		rate = 0;
4228
4229	/* Optional, defaults to standard TRIM when not provided */
4230	boolean_t secure;
4231	if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
4232	    &secure) != 0) {
4233		secure = B_FALSE;
4234	}
4235
4236	spa_t *spa;
4237	int error = spa_open(poolname, &spa, FTAG);
4238	if (error != 0)
4239		return (error);
4240
4241	nvlist_t *vdev_errlist = fnvlist_alloc();
4242	int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
4243	    rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
4244
4245	if (fnvlist_size(vdev_errlist) > 0)
4246		fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
4247
4248	fnvlist_free(vdev_errlist);
4249
4250	spa_close(spa, FTAG);
4251	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4252}
4253
4254/*
4255 * This ioctl waits for activity of a particular type to complete. If there is
4256 * no activity of that type in progress, it returns immediately, and the
4257 * returned value "waited" is false. If there is activity in progress, and no
4258 * tag is passed in, the ioctl blocks until all activity of that type is
4259 * complete, and then returns with "waited" set to true.
4260 *
4261 * If a tag is provided, it identifies a particular instance of an activity to
4262 * wait for. Currently, this is only valid for use with 'initialize', because
4263 * that is the only activity for which there can be multiple instances running
4264 * concurrently. In the case of 'initialize', the tag corresponds to the guid of
4265 * the vdev on which to wait.
4266 *
4267 * If a thread waiting in the ioctl receives a signal, the call will return
4268 * immediately, and the return value will be EINTR.
4269 *
4270 * innvl: {
4271 *     "wait_activity" -> int32_t
4272 *     (optional) "wait_tag" -> uint64_t
4273 * }
4274 *
4275 * outnvl: "waited" -> boolean_t
4276 */
4277static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
4278	{ZPOOL_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
4279	{ZPOOL_WAIT_TAG,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4280};
4281
4282static int
4283zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4284{
4285	int32_t activity;
4286	uint64_t tag;
4287	boolean_t waited;
4288	int error;
4289
4290	if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
4291		return (EINVAL);
4292
4293	if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
4294		error = spa_wait_tag(name, activity, tag, &waited);
4295	else
4296		error = spa_wait(name, activity, &waited);
4297
4298	if (error == 0)
4299		fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
4300
4301	return (error);
4302}
4303
4304/*
4305 * This ioctl waits for activity of a particular type to complete. If there is
4306 * no activity of that type in progress, it returns immediately, and the
4307 * returned value "waited" is false. If there is activity in progress, and no
4308 * tag is passed in, the ioctl blocks until all activity of that type is
4309 * complete, and then returns with "waited" set to true.
4310 *
4311 * If a thread waiting in the ioctl receives a signal, the call will return
4312 * immediately, and the return value will be EINTR.
4313 *
4314 * innvl: {
4315 *     "wait_activity" -> int32_t
4316 * }
4317 *
4318 * outnvl: "waited" -> boolean_t
4319 */
4320static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
4321	{ZFS_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
4322};
4323
4324static int
4325zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4326{
4327	int32_t activity;
4328	boolean_t waited = B_FALSE;
4329	int error;
4330	dsl_pool_t *dp;
4331	dsl_dir_t *dd;
4332	dsl_dataset_t *ds;
4333
4334	if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
4335		return (SET_ERROR(EINVAL));
4336
4337	if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
4338		return (SET_ERROR(EINVAL));
4339
4340	if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
4341		return (error);
4342
4343	if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
4344		dsl_pool_rele(dp, FTAG);
4345		return (error);
4346	}
4347
4348	dd = ds->ds_dir;
4349	mutex_enter(&dd->dd_activity_lock);
4350	dd->dd_activity_waiters++;
4351
4352	/*
4353	 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
4354	 * aren't evicted while we're waiting. Normally this is prevented by
4355	 * holding the pool, but we can't do that while we're waiting since
4356	 * that would prevent TXGs from syncing out. Some of the functionality
4357	 * of long-holds (e.g. preventing deletion) is unnecessary for this
4358	 * case, since we would cancel the waiters before proceeding with a
4359	 * deletion. An alternative mechanism for keeping the dataset around
4360	 * could be developed but this is simpler.
4361	 */
4362	dsl_dataset_long_hold(ds, FTAG);
4363	dsl_pool_rele(dp, FTAG);
4364
4365	error = dsl_dir_wait(dd, ds, activity, &waited);
4366
4367	dsl_dataset_long_rele(ds, FTAG);
4368	dd->dd_activity_waiters--;
4369	if (dd->dd_activity_waiters == 0)
4370		cv_signal(&dd->dd_activity_cv);
4371	mutex_exit(&dd->dd_activity_lock);
4372
4373	dsl_dataset_rele(ds, FTAG);
4374
4375	if (error == 0)
4376		fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
4377
4378	return (error);
4379}
4380
4381/*
4382 * fsname is name of dataset to rollback (to most recent snapshot)
4383 *
4384 * innvl may contain name of expected target snapshot
4385 *
4386 * outnvl: "target" -> name of most recent snapshot
4387 * }
4388 */
4389static const zfs_ioc_key_t zfs_keys_rollback[] = {
4390	{"target",	DATA_TYPE_STRING,	ZK_OPTIONAL},
4391};
4392
4393static int
4394zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4395{
4396	zfsvfs_t *zfsvfs;
4397	zvol_state_handle_t *zv;
4398	const char *target = NULL;
4399	int error;
4400
4401	(void) nvlist_lookup_string(innvl, "target", &target);
4402	if (target != NULL) {
4403		const char *cp = strchr(target, '@');
4404
4405		/*
4406		 * The snap name must contain an @, and the part after it must
4407		 * contain only valid characters.
4408		 */
4409		if (cp == NULL ||
4410		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4411			return (SET_ERROR(EINVAL));
4412	}
4413
4414	if (getzfsvfs(fsname, &zfsvfs) == 0) {
4415		dsl_dataset_t *ds;
4416
4417		ds = dmu_objset_ds(zfsvfs->z_os);
4418		error = zfs_suspend_fs(zfsvfs);
4419		if (error == 0) {
4420			int resume_err;
4421
4422			error = dsl_dataset_rollback(fsname, target, zfsvfs,
4423			    outnvl);
4424			resume_err = zfs_resume_fs(zfsvfs, ds);
4425			error = error ? error : resume_err;
4426		}
4427		zfs_vfs_rele(zfsvfs);
4428	} else if ((zv = zvol_suspend(fsname)) != NULL) {
4429		error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
4430		    outnvl);
4431		zvol_resume(zv);
4432	} else {
4433		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4434	}
4435	return (error);
4436}
4437
4438static int
4439recursive_unmount(const char *fsname, void *arg)
4440{
4441	const char *snapname = arg;
4442	char *fullname;
4443
4444	fullname = kmem_asprintf("%s@%s", fsname, snapname);
4445	zfs_unmount_snap(fullname);
4446	kmem_strfree(fullname);
4447
4448	return (0);
4449}
4450
4451/*
4452 *
4453 * snapname is the snapshot to redact.
4454 * innvl: {
4455 *     "bookname" -> (string)
4456 *         shortname of the redaction bookmark to generate
4457 *     "snapnv" -> (nvlist, values ignored)
4458 *         snapshots to redact snapname with respect to
4459 * }
4460 *
4461 * outnvl is unused
4462 */
4463
4464static const zfs_ioc_key_t zfs_keys_redact[] = {
4465	{"bookname",		DATA_TYPE_STRING,	0},
4466	{"snapnv",		DATA_TYPE_NVLIST,	0},
4467};
4468
4469static int
4470zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
4471{
4472	(void) outnvl;
4473	nvlist_t *redactnvl = NULL;
4474	const char *redactbook = NULL;
4475
4476	if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
4477		return (SET_ERROR(EINVAL));
4478	if (fnvlist_num_pairs(redactnvl) == 0)
4479		return (SET_ERROR(ENXIO));
4480	if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
4481		return (SET_ERROR(EINVAL));
4482
4483	return (dmu_redact_snap(snapname, redactnvl, redactbook));
4484}
4485
4486/*
4487 * inputs:
4488 * zc_name	old name of dataset
4489 * zc_value	new name of dataset
4490 * zc_cookie	recursive flag (only valid for snapshots)
4491 *
4492 * outputs:	none
4493 */
4494static int
4495zfs_ioc_rename(zfs_cmd_t *zc)
4496{
4497	objset_t *os;
4498	dmu_objset_type_t ost;
4499	boolean_t recursive = zc->zc_cookie & 1;
4500	boolean_t nounmount = !!(zc->zc_cookie & 2);
4501	char *at;
4502	int err;
4503
4504	/* "zfs rename" from and to ...%recv datasets should both fail */
4505	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4506	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4507	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4508	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4509	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
4510		return (SET_ERROR(EINVAL));
4511
4512	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4513	if (err != 0)
4514		return (err);
4515	ost = dmu_objset_type(os);
4516	dmu_objset_rele(os, FTAG);
4517
4518	at = strchr(zc->zc_name, '@');
4519	if (at != NULL) {
4520		/* snaps must be in same fs */
4521		int error;
4522
4523		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4524			return (SET_ERROR(EXDEV));
4525		*at = '\0';
4526		if (ost == DMU_OST_ZFS && !nounmount) {
4527			error = dmu_objset_find(zc->zc_name,
4528			    recursive_unmount, at + 1,
4529			    recursive ? DS_FIND_CHILDREN : 0);
4530			if (error != 0) {
4531				*at = '@';
4532				return (error);
4533			}
4534		}
4535		error = dsl_dataset_rename_snapshot(zc->zc_name,
4536		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4537		*at = '@';
4538
4539		return (error);
4540	} else {
4541		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
4542	}
4543}
4544
4545static int
4546zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4547{
4548	const char *propname = nvpair_name(pair);
4549	boolean_t issnap = (strchr(dsname, '@') != NULL);
4550	zfs_prop_t prop = zfs_name_to_prop(propname);
4551	uint64_t intval, compval;
4552	int err;
4553
4554	if (prop == ZPROP_USERPROP) {
4555		if (zfs_prop_user(propname)) {
4556			if ((err = zfs_secpolicy_write_perms(dsname,
4557			    ZFS_DELEG_PERM_USERPROP, cr)))
4558				return (err);
4559			return (0);
4560		}
4561
4562		if (!issnap && zfs_prop_userquota(propname)) {
4563			const char *perm = NULL;
4564			const char *uq_prefix =
4565			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4566			const char *gq_prefix =
4567			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4568			const char *uiq_prefix =
4569			    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
4570			const char *giq_prefix =
4571			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
4572			const char *pq_prefix =
4573			    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
4574			const char *piq_prefix = zfs_userquota_prop_prefixes[\
4575			    ZFS_PROP_PROJECTOBJQUOTA];
4576
4577			if (strncmp(propname, uq_prefix,
4578			    strlen(uq_prefix)) == 0) {
4579				perm = ZFS_DELEG_PERM_USERQUOTA;
4580			} else if (strncmp(propname, uiq_prefix,
4581			    strlen(uiq_prefix)) == 0) {
4582				perm = ZFS_DELEG_PERM_USEROBJQUOTA;
4583			} else if (strncmp(propname, gq_prefix,
4584			    strlen(gq_prefix)) == 0) {
4585				perm = ZFS_DELEG_PERM_GROUPQUOTA;
4586			} else if (strncmp(propname, giq_prefix,
4587			    strlen(giq_prefix)) == 0) {
4588				perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
4589			} else if (strncmp(propname, pq_prefix,
4590			    strlen(pq_prefix)) == 0) {
4591				perm = ZFS_DELEG_PERM_PROJECTQUOTA;
4592			} else if (strncmp(propname, piq_prefix,
4593			    strlen(piq_prefix)) == 0) {
4594				perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
4595			} else {
4596				/* {USER|GROUP|PROJECT}USED are read-only */
4597				return (SET_ERROR(EINVAL));
4598			}
4599
4600			if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
4601				return (err);
4602			return (0);
4603		}
4604
4605		return (SET_ERROR(EINVAL));
4606	}
4607
4608	if (issnap)
4609		return (SET_ERROR(EINVAL));
4610
4611	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4612		/*
4613		 * dsl_prop_get_all_impl() returns properties in this
4614		 * format.
4615		 */
4616		nvlist_t *attrs;
4617		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4618		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4619		    &pair) == 0);
4620	}
4621
4622	/*
4623	 * Check that this value is valid for this pool version
4624	 */
4625	switch (prop) {
4626	case ZFS_PROP_COMPRESSION:
4627		/*
4628		 * If the user specified gzip compression, make sure
4629		 * the SPA supports it. We ignore any errors here since
4630		 * we'll catch them later.
4631		 */
4632		if (nvpair_value_uint64(pair, &intval) == 0) {
4633			compval = ZIO_COMPRESS_ALGO(intval);
4634			if (compval >= ZIO_COMPRESS_GZIP_1 &&
4635			    compval <= ZIO_COMPRESS_GZIP_9 &&
4636			    zfs_earlier_version(dsname,
4637			    SPA_VERSION_GZIP_COMPRESSION)) {
4638				return (SET_ERROR(ENOTSUP));
4639			}
4640
4641			if (compval == ZIO_COMPRESS_ZLE &&
4642			    zfs_earlier_version(dsname,
4643			    SPA_VERSION_ZLE_COMPRESSION))
4644				return (SET_ERROR(ENOTSUP));
4645
4646			if (compval == ZIO_COMPRESS_LZ4) {
4647				spa_t *spa;
4648
4649				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4650					return (err);
4651
4652				if (!spa_feature_is_enabled(spa,
4653				    SPA_FEATURE_LZ4_COMPRESS)) {
4654					spa_close(spa, FTAG);
4655					return (SET_ERROR(ENOTSUP));
4656				}
4657				spa_close(spa, FTAG);
4658			}
4659
4660			if (compval == ZIO_COMPRESS_ZSTD) {
4661				spa_t *spa;
4662
4663				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4664					return (err);
4665
4666				if (!spa_feature_is_enabled(spa,
4667				    SPA_FEATURE_ZSTD_COMPRESS)) {
4668					spa_close(spa, FTAG);
4669					return (SET_ERROR(ENOTSUP));
4670				}
4671				spa_close(spa, FTAG);
4672			}
4673		}
4674		break;
4675
4676	case ZFS_PROP_COPIES:
4677		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4678			return (SET_ERROR(ENOTSUP));
4679		break;
4680
4681	case ZFS_PROP_VOLBLOCKSIZE:
4682	case ZFS_PROP_RECORDSIZE:
4683		/* Record sizes above 128k need the feature to be enabled */
4684		if (nvpair_value_uint64(pair, &intval) == 0 &&
4685		    intval > SPA_OLD_MAXBLOCKSIZE) {
4686			spa_t *spa;
4687
4688			/*
4689			 * We don't allow setting the property above 1MB,
4690			 * unless the tunable has been changed.
4691			 */
4692			if (intval > zfs_max_recordsize ||
4693			    intval > SPA_MAXBLOCKSIZE)
4694				return (SET_ERROR(ERANGE));
4695
4696			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4697				return (err);
4698
4699			if (!spa_feature_is_enabled(spa,
4700			    SPA_FEATURE_LARGE_BLOCKS)) {
4701				spa_close(spa, FTAG);
4702				return (SET_ERROR(ENOTSUP));
4703			}
4704			spa_close(spa, FTAG);
4705		}
4706		break;
4707
4708	case ZFS_PROP_DNODESIZE:
4709		/* Dnode sizes above 512 need the feature to be enabled */
4710		if (nvpair_value_uint64(pair, &intval) == 0 &&
4711		    intval != ZFS_DNSIZE_LEGACY) {
4712			spa_t *spa;
4713
4714			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4715				return (err);
4716
4717			if (!spa_feature_is_enabled(spa,
4718			    SPA_FEATURE_LARGE_DNODE)) {
4719				spa_close(spa, FTAG);
4720				return (SET_ERROR(ENOTSUP));
4721			}
4722			spa_close(spa, FTAG);
4723		}
4724		break;
4725
4726	case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
4727		/*
4728		 * This property could require the allocation classes
4729		 * feature to be active for setting, however we allow
4730		 * it so that tests of settable properties succeed.
4731		 * The CLI will issue a warning in this case.
4732		 */
4733		break;
4734
4735	case ZFS_PROP_SHARESMB:
4736		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4737			return (SET_ERROR(ENOTSUP));
4738		break;
4739
4740	case ZFS_PROP_ACLINHERIT:
4741		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4742		    nvpair_value_uint64(pair, &intval) == 0) {
4743			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4744			    zfs_earlier_version(dsname,
4745			    SPA_VERSION_PASSTHROUGH_X))
4746				return (SET_ERROR(ENOTSUP));
4747		}
4748		break;
4749	case ZFS_PROP_CHECKSUM:
4750	case ZFS_PROP_DEDUP:
4751	{
4752		spa_feature_t feature;
4753		spa_t *spa;
4754		int err;
4755
4756		/* dedup feature version checks */
4757		if (prop == ZFS_PROP_DEDUP &&
4758		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4759			return (SET_ERROR(ENOTSUP));
4760
4761		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4762		    nvpair_value_uint64(pair, &intval) == 0) {
4763			/* check prop value is enabled in features */
4764			feature = zio_checksum_to_feature(
4765			    intval & ZIO_CHECKSUM_MASK);
4766			if (feature == SPA_FEATURE_NONE)
4767				break;
4768
4769			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4770				return (err);
4771
4772			if (!spa_feature_is_enabled(spa, feature)) {
4773				spa_close(spa, FTAG);
4774				return (SET_ERROR(ENOTSUP));
4775			}
4776			spa_close(spa, FTAG);
4777		}
4778		break;
4779	}
4780
4781	default:
4782		break;
4783	}
4784
4785	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4786}
4787
4788/*
4789 * Removes properties from the given props list that fail permission checks
4790 * needed to clear them and to restore them in case of a receive error. For each
4791 * property, make sure we have both set and inherit permissions.
4792 *
4793 * Returns the first error encountered if any permission checks fail. If the
4794 * caller provides a non-NULL errlist, it also gives the complete list of names
4795 * of all the properties that failed a permission check along with the
4796 * corresponding error numbers. The caller is responsible for freeing the
4797 * returned errlist.
4798 *
4799 * If every property checks out successfully, zero is returned and the list
4800 * pointed at by errlist is NULL.
4801 */
4802static int
4803zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
4804{
4805	zfs_cmd_t *zc;
4806	nvpair_t *pair, *next_pair;
4807	nvlist_t *errors;
4808	int err, rv = 0;
4809
4810	if (props == NULL)
4811		return (0);
4812
4813	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4814
4815	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4816	(void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
4817	pair = nvlist_next_nvpair(props, NULL);
4818	while (pair != NULL) {
4819		next_pair = nvlist_next_nvpair(props, pair);
4820
4821		(void) strlcpy(zc->zc_value, nvpair_name(pair),
4822		    sizeof (zc->zc_value));
4823		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4824		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4825			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4826			VERIFY(nvlist_add_int32(errors,
4827			    zc->zc_value, err) == 0);
4828		}
4829		pair = next_pair;
4830	}
4831	kmem_free(zc, sizeof (zfs_cmd_t));
4832
4833	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4834		nvlist_free(errors);
4835		errors = NULL;
4836	} else {
4837		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4838	}
4839
4840	if (errlist == NULL)
4841		nvlist_free(errors);
4842	else
4843		*errlist = errors;
4844
4845	return (rv);
4846}
4847
4848static boolean_t
4849propval_equals(nvpair_t *p1, nvpair_t *p2)
4850{
4851	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4852		/* dsl_prop_get_all_impl() format */
4853		nvlist_t *attrs;
4854		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4855		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4856		    &p1) == 0);
4857	}
4858
4859	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4860		nvlist_t *attrs;
4861		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4862		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4863		    &p2) == 0);
4864	}
4865
4866	if (nvpair_type(p1) != nvpair_type(p2))
4867		return (B_FALSE);
4868
4869	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4870		const char *valstr1, *valstr2;
4871
4872		VERIFY(nvpair_value_string(p1, &valstr1) == 0);
4873		VERIFY(nvpair_value_string(p2, &valstr2) == 0);
4874		return (strcmp(valstr1, valstr2) == 0);
4875	} else {
4876		uint64_t intval1, intval2;
4877
4878		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4879		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4880		return (intval1 == intval2);
4881	}
4882}
4883
4884/*
4885 * Remove properties from props if they are not going to change (as determined
4886 * by comparison with origprops). Remove them from origprops as well, since we
4887 * do not need to clear or restore properties that won't change.
4888 */
4889static void
4890props_reduce(nvlist_t *props, nvlist_t *origprops)
4891{
4892	nvpair_t *pair, *next_pair;
4893
4894	if (origprops == NULL)
4895		return; /* all props need to be received */
4896
4897	pair = nvlist_next_nvpair(props, NULL);
4898	while (pair != NULL) {
4899		const char *propname = nvpair_name(pair);
4900		nvpair_t *match;
4901
4902		next_pair = nvlist_next_nvpair(props, pair);
4903
4904		if ((nvlist_lookup_nvpair(origprops, propname,
4905		    &match) != 0) || !propval_equals(pair, match))
4906			goto next; /* need to set received value */
4907
4908		/* don't clear the existing received value */
4909		(void) nvlist_remove_nvpair(origprops, match);
4910		/* don't bother receiving the property */
4911		(void) nvlist_remove_nvpair(props, pair);
4912next:
4913		pair = next_pair;
4914	}
4915}
4916
4917/*
4918 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4919 * For example, refquota cannot be set until after the receipt of a dataset,
4920 * because in replication streams, an older/earlier snapshot may exceed the
4921 * refquota.  We want to receive the older/earlier snapshot, but setting
4922 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4923 * the older/earlier snapshot from being received (with EDQUOT).
4924 *
4925 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4926 *
4927 * libzfs will need to be judicious handling errors encountered by props
4928 * extracted by this function.
4929 */
4930static nvlist_t *
4931extract_delay_props(nvlist_t *props)
4932{
4933	nvlist_t *delayprops;
4934	nvpair_t *nvp, *tmp;
4935	static const zfs_prop_t delayable[] = {
4936		ZFS_PROP_REFQUOTA,
4937		ZFS_PROP_KEYLOCATION,
4938		/*
4939		 * Setting ZFS_PROP_SHARESMB requires the objset type to be
4940		 * known, which is not possible prior to receipt of raw sends.
4941		 */
4942		ZFS_PROP_SHARESMB,
4943		0
4944	};
4945	int i;
4946
4947	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4948
4949	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4950	    nvp = nvlist_next_nvpair(props, nvp)) {
4951		/*
4952		 * strcmp() is safe because zfs_prop_to_name() always returns
4953		 * a bounded string.
4954		 */
4955		for (i = 0; delayable[i] != 0; i++) {
4956			if (strcmp(zfs_prop_to_name(delayable[i]),
4957			    nvpair_name(nvp)) == 0) {
4958				break;
4959			}
4960		}
4961		if (delayable[i] != 0) {
4962			tmp = nvlist_prev_nvpair(props, nvp);
4963			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4964			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4965			nvp = tmp;
4966		}
4967	}
4968
4969	if (nvlist_empty(delayprops)) {
4970		nvlist_free(delayprops);
4971		delayprops = NULL;
4972	}
4973	return (delayprops);
4974}
4975
4976static void
4977zfs_allow_log_destroy(void *arg)
4978{
4979	char *poolname = arg;
4980
4981	if (poolname != NULL)
4982		kmem_strfree(poolname);
4983}
4984
4985#ifdef	ZFS_DEBUG
4986static boolean_t zfs_ioc_recv_inject_err;
4987#endif
4988
4989/*
4990 * nvlist 'errors' is always allocated. It will contain descriptions of
4991 * encountered errors, if any. It's the callers responsibility to free.
4992 */
4993static int
4994zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin,
4995    nvlist_t *recvprops, nvlist_t *localprops, nvlist_t *hidden_args,
4996    boolean_t force, boolean_t heal, boolean_t resumable, int input_fd,
4997    dmu_replay_record_t *begin_record, uint64_t *read_bytes,
4998    uint64_t *errflags, nvlist_t **errors)
4999{
5000	dmu_recv_cookie_t drc;
5001	int error = 0;
5002	int props_error = 0;
5003	offset_t off, noff;
5004	nvlist_t *local_delayprops = NULL;
5005	nvlist_t *recv_delayprops = NULL;
5006	nvlist_t *inherited_delayprops = NULL;
5007	nvlist_t *origprops = NULL; /* existing properties */
5008	nvlist_t *origrecvd = NULL; /* existing received properties */
5009	boolean_t first_recvd_props = B_FALSE;
5010	boolean_t tofs_was_redacted;
5011	zfs_file_t *input_fp;
5012
5013	*read_bytes = 0;
5014	*errflags = 0;
5015	*errors = fnvlist_alloc();
5016	off = 0;
5017
5018	if ((input_fp = zfs_file_get(input_fd)) == NULL)
5019		return (SET_ERROR(EBADF));
5020
5021	noff = off = zfs_file_off(input_fp);
5022	error = dmu_recv_begin(tofs, tosnap, begin_record, force, heal,
5023	    resumable, localprops, hidden_args, origin, &drc, input_fp,
5024	    &off);
5025	if (error != 0)
5026		goto out;
5027	tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
5028
5029	/*
5030	 * Set properties before we receive the stream so that they are applied
5031	 * to the new data. Note that we must call dmu_recv_stream() if
5032	 * dmu_recv_begin() succeeds.
5033	 */
5034	if (recvprops != NULL && !drc.drc_newfs) {
5035		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
5036		    SPA_VERSION_RECVD_PROPS &&
5037		    !dsl_prop_get_hasrecvd(tofs))
5038			first_recvd_props = B_TRUE;
5039
5040		/*
5041		 * If new received properties are supplied, they are to
5042		 * completely replace the existing received properties,
5043		 * so stash away the existing ones.
5044		 */
5045		if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
5046			nvlist_t *errlist = NULL;
5047			/*
5048			 * Don't bother writing a property if its value won't
5049			 * change (and avoid the unnecessary security checks).
5050			 *
5051			 * The first receive after SPA_VERSION_RECVD_PROPS is a
5052			 * special case where we blow away all local properties
5053			 * regardless.
5054			 */
5055			if (!first_recvd_props)
5056				props_reduce(recvprops, origrecvd);
5057			if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
5058				(void) nvlist_merge(*errors, errlist, 0);
5059			nvlist_free(errlist);
5060
5061			if (clear_received_props(tofs, origrecvd,
5062			    first_recvd_props ? NULL : recvprops) != 0)
5063				*errflags |= ZPROP_ERR_NOCLEAR;
5064		} else {
5065			*errflags |= ZPROP_ERR_NOCLEAR;
5066		}
5067	}
5068
5069	/*
5070	 * Stash away existing properties so we can restore them on error unless
5071	 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
5072	 * case "origrecvd" will take care of that.
5073	 */
5074	if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
5075		objset_t *os;
5076		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
5077			if (dsl_prop_get_all(os, &origprops) != 0) {
5078				*errflags |= ZPROP_ERR_NOCLEAR;
5079			}
5080			dmu_objset_rele(os, FTAG);
5081		} else {
5082			*errflags |= ZPROP_ERR_NOCLEAR;
5083		}
5084	}
5085
5086	if (recvprops != NULL) {
5087		props_error = dsl_prop_set_hasrecvd(tofs);
5088
5089		if (props_error == 0) {
5090			recv_delayprops = extract_delay_props(recvprops);
5091			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5092			    recvprops, *errors);
5093		}
5094	}
5095
5096	if (localprops != NULL) {
5097		nvlist_t *oprops = fnvlist_alloc();
5098		nvlist_t *xprops = fnvlist_alloc();
5099		nvpair_t *nvp = NULL;
5100
5101		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5102			if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
5103				/* -x property */
5104				const char *name = nvpair_name(nvp);
5105				zfs_prop_t prop = zfs_name_to_prop(name);
5106				if (prop != ZPROP_USERPROP) {
5107					if (!zfs_prop_inheritable(prop))
5108						continue;
5109				} else if (!zfs_prop_user(name))
5110					continue;
5111				fnvlist_add_boolean(xprops, name);
5112			} else {
5113				/* -o property=value */
5114				fnvlist_add_nvpair(oprops, nvp);
5115			}
5116		}
5117
5118		local_delayprops = extract_delay_props(oprops);
5119		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5120		    oprops, *errors);
5121		inherited_delayprops = extract_delay_props(xprops);
5122		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5123		    xprops, *errors);
5124
5125		nvlist_free(oprops);
5126		nvlist_free(xprops);
5127	}
5128
5129	error = dmu_recv_stream(&drc, &off);
5130
5131	if (error == 0) {
5132		zfsvfs_t *zfsvfs = NULL;
5133		zvol_state_handle_t *zv = NULL;
5134
5135		if (getzfsvfs(tofs, &zfsvfs) == 0) {
5136			/* online recv */
5137			dsl_dataset_t *ds;
5138			int end_err;
5139			boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
5140			    begin_record->drr_u.drr_begin.
5141			    drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
5142
5143			ds = dmu_objset_ds(zfsvfs->z_os);
5144			error = zfs_suspend_fs(zfsvfs);
5145			/*
5146			 * If the suspend fails, then the recv_end will
5147			 * likely also fail, and clean up after itself.
5148			 */
5149			end_err = dmu_recv_end(&drc, zfsvfs);
5150			/*
5151			 * If the dataset was not redacted, but we received a
5152			 * redacted stream onto it, we need to unmount the
5153			 * dataset.  Otherwise, resume the filesystem.
5154			 */
5155			if (error == 0 && !drc.drc_newfs &&
5156			    stream_is_redacted && !tofs_was_redacted) {
5157				error = zfs_end_fs(zfsvfs, ds);
5158			} else if (error == 0) {
5159				error = zfs_resume_fs(zfsvfs, ds);
5160			}
5161			error = error ? error : end_err;
5162			zfs_vfs_rele(zfsvfs);
5163		} else if ((zv = zvol_suspend(tofs)) != NULL) {
5164			error = dmu_recv_end(&drc, zvol_tag(zv));
5165			zvol_resume(zv);
5166		} else {
5167			error = dmu_recv_end(&drc, NULL);
5168		}
5169
5170		/* Set delayed properties now, after we're done receiving. */
5171		if (recv_delayprops != NULL && error == 0) {
5172			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5173			    recv_delayprops, *errors);
5174		}
5175		if (local_delayprops != NULL && error == 0) {
5176			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5177			    local_delayprops, *errors);
5178		}
5179		if (inherited_delayprops != NULL && error == 0) {
5180			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5181			    inherited_delayprops, *errors);
5182		}
5183	}
5184
5185	/*
5186	 * Merge delayed props back in with initial props, in case
5187	 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
5188	 * we have to make sure clear_received_props() includes
5189	 * the delayed properties).
5190	 *
5191	 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
5192	 * using ASSERT() will be just like a VERIFY.
5193	 */
5194	if (recv_delayprops != NULL) {
5195		ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
5196		nvlist_free(recv_delayprops);
5197	}
5198	if (local_delayprops != NULL) {
5199		ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
5200		nvlist_free(local_delayprops);
5201	}
5202	if (inherited_delayprops != NULL) {
5203		ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0);
5204		nvlist_free(inherited_delayprops);
5205	}
5206	*read_bytes = off - noff;
5207
5208#ifdef	ZFS_DEBUG
5209	if (zfs_ioc_recv_inject_err) {
5210		zfs_ioc_recv_inject_err = B_FALSE;
5211		error = 1;
5212	}
5213#endif
5214
5215	/*
5216	 * On error, restore the original props.
5217	 */
5218	if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
5219		if (clear_received_props(tofs, recvprops, NULL) != 0) {
5220			/*
5221			 * We failed to clear the received properties.
5222			 * Since we may have left a $recvd value on the
5223			 * system, we can't clear the $hasrecvd flag.
5224			 */
5225			*errflags |= ZPROP_ERR_NORESTORE;
5226		} else if (first_recvd_props) {
5227			dsl_prop_unset_hasrecvd(tofs);
5228		}
5229
5230		if (origrecvd == NULL && !drc.drc_newfs) {
5231			/* We failed to stash the original properties. */
5232			*errflags |= ZPROP_ERR_NORESTORE;
5233		}
5234
5235		/*
5236		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
5237		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
5238		 * explicitly if we're restoring local properties cleared in the
5239		 * first new-style receive.
5240		 */
5241		if (origrecvd != NULL &&
5242		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
5243		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
5244		    origrecvd, NULL) != 0) {
5245			/*
5246			 * We stashed the original properties but failed to
5247			 * restore them.
5248			 */
5249			*errflags |= ZPROP_ERR_NORESTORE;
5250		}
5251	}
5252	if (error != 0 && localprops != NULL && !drc.drc_newfs &&
5253	    !first_recvd_props) {
5254		nvlist_t *setprops;
5255		nvlist_t *inheritprops;
5256		nvpair_t *nvp;
5257
5258		if (origprops == NULL) {
5259			/* We failed to stash the original properties. */
5260			*errflags |= ZPROP_ERR_NORESTORE;
5261			goto out;
5262		}
5263
5264		/* Restore original props */
5265		setprops = fnvlist_alloc();
5266		inheritprops = fnvlist_alloc();
5267		nvp = NULL;
5268		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5269			const char *name = nvpair_name(nvp);
5270			const char *source;
5271			nvlist_t *attrs;
5272
5273			if (!nvlist_exists(origprops, name)) {
5274				/*
5275				 * Property was not present or was explicitly
5276				 * inherited before the receive, restore this.
5277				 */
5278				fnvlist_add_boolean(inheritprops, name);
5279				continue;
5280			}
5281			attrs = fnvlist_lookup_nvlist(origprops, name);
5282			source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
5283
5284			/* Skip received properties */
5285			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
5286				continue;
5287
5288			if (strcmp(source, tofs) == 0) {
5289				/* Property was locally set */
5290				fnvlist_add_nvlist(setprops, name, attrs);
5291			} else {
5292				/* Property was implicitly inherited */
5293				fnvlist_add_boolean(inheritprops, name);
5294			}
5295		}
5296
5297		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
5298		    NULL) != 0)
5299			*errflags |= ZPROP_ERR_NORESTORE;
5300		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
5301		    NULL) != 0)
5302			*errflags |= ZPROP_ERR_NORESTORE;
5303
5304		nvlist_free(setprops);
5305		nvlist_free(inheritprops);
5306	}
5307out:
5308	zfs_file_put(input_fp);
5309	nvlist_free(origrecvd);
5310	nvlist_free(origprops);
5311
5312	if (error == 0)
5313		error = props_error;
5314
5315	return (error);
5316}
5317
5318/*
5319 * inputs:
5320 * zc_name		name of containing filesystem (unused)
5321 * zc_nvlist_src{_size}	nvlist of properties to apply
5322 * zc_nvlist_conf{_size}	nvlist of properties to exclude
5323 *			(DATA_TYPE_BOOLEAN) and override (everything else)
5324 * zc_value		name of snapshot to create
5325 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
5326 * zc_cookie		file descriptor to recv from
5327 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
5328 * zc_guid		force flag
5329 *
5330 * outputs:
5331 * zc_cookie		number of bytes read
5332 * zc_obj		zprop_errflags_t
5333 * zc_nvlist_dst{_size} error for each unapplied received property
5334 */
5335static int
5336zfs_ioc_recv(zfs_cmd_t *zc)
5337{
5338	dmu_replay_record_t begin_record;
5339	nvlist_t *errors = NULL;
5340	nvlist_t *recvdprops = NULL;
5341	nvlist_t *localprops = NULL;
5342	const char *origin = NULL;
5343	char *tosnap;
5344	char tofs[ZFS_MAX_DATASET_NAME_LEN];
5345	int error = 0;
5346
5347	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5348	    strchr(zc->zc_value, '@') == NULL ||
5349	    strchr(zc->zc_value, '%') != NULL) {
5350		return (SET_ERROR(EINVAL));
5351	}
5352
5353	(void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
5354	tosnap = strchr(tofs, '@');
5355	*tosnap++ = '\0';
5356
5357	if (zc->zc_nvlist_src != 0 &&
5358	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5359	    zc->zc_iflags, &recvdprops)) != 0) {
5360		goto out;
5361	}
5362
5363	if (zc->zc_nvlist_conf != 0 &&
5364	    (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
5365	    zc->zc_iflags, &localprops)) != 0) {
5366		goto out;
5367	}
5368
5369	if (zc->zc_string[0])
5370		origin = zc->zc_string;
5371
5372	begin_record.drr_type = DRR_BEGIN;
5373	begin_record.drr_payloadlen = 0;
5374	begin_record.drr_u.drr_begin = zc->zc_begin_record;
5375
5376	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
5377	    NULL, zc->zc_guid, B_FALSE, B_FALSE, zc->zc_cookie, &begin_record,
5378	    &zc->zc_cookie, &zc->zc_obj, &errors);
5379
5380	/*
5381	 * Now that all props, initial and delayed, are set, report the prop
5382	 * errors to the caller.
5383	 */
5384	if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
5385	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5386	    put_nvlist(zc, errors) != 0)) {
5387		/*
5388		 * Caller made zc->zc_nvlist_dst less than the minimum expected
5389		 * size or supplied an invalid address.
5390		 */
5391		error = SET_ERROR(EINVAL);
5392	}
5393
5394out:
5395	nvlist_free(errors);
5396	nvlist_free(recvdprops);
5397	nvlist_free(localprops);
5398
5399	return (error);
5400}
5401
5402/*
5403 * innvl: {
5404 *     "snapname" -> full name of the snapshot to create
5405 *     (optional) "props" -> received properties to set (nvlist)
5406 *     (optional) "localprops" -> override and exclude properties (nvlist)
5407 *     (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
5408 *     "begin_record" -> non-byteswapped dmu_replay_record_t
5409 *     "input_fd" -> file descriptor to read stream from (int32)
5410 *     (optional) "force" -> force flag (value ignored)
5411 *     (optional) "heal" -> use send stream to heal data corruption
5412 *     (optional) "resumable" -> resumable flag (value ignored)
5413 *     (optional) "cleanup_fd" -> unused
5414 *     (optional) "action_handle" -> unused
5415 *     (optional) "hidden_args" -> { "wkeydata" -> value }
5416 * }
5417 *
5418 * outnvl: {
5419 *     "read_bytes" -> number of bytes read
5420 *     "error_flags" -> zprop_errflags_t
5421 *     "errors" -> error for each unapplied received property (nvlist)
5422 * }
5423 */
5424static const zfs_ioc_key_t zfs_keys_recv_new[] = {
5425	{"snapname",		DATA_TYPE_STRING,	0},
5426	{"props",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
5427	{"localprops",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
5428	{"origin",		DATA_TYPE_STRING,	ZK_OPTIONAL},
5429	{"begin_record",	DATA_TYPE_BYTE_ARRAY,	0},
5430	{"input_fd",		DATA_TYPE_INT32,	0},
5431	{"force",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
5432	{"heal",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
5433	{"resumable",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
5434	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
5435	{"action_handle",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
5436	{"hidden_args",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
5437};
5438
5439static int
5440zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
5441{
5442	dmu_replay_record_t *begin_record;
5443	uint_t begin_record_size;
5444	nvlist_t *errors = NULL;
5445	nvlist_t *recvprops = NULL;
5446	nvlist_t *localprops = NULL;
5447	nvlist_t *hidden_args = NULL;
5448	const char *snapname;
5449	const char *origin = NULL;
5450	char *tosnap;
5451	char tofs[ZFS_MAX_DATASET_NAME_LEN];
5452	boolean_t force;
5453	boolean_t heal;
5454	boolean_t resumable;
5455	uint64_t read_bytes = 0;
5456	uint64_t errflags = 0;
5457	int input_fd = -1;
5458	int error;
5459
5460	snapname = fnvlist_lookup_string(innvl, "snapname");
5461
5462	if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
5463	    strchr(snapname, '@') == NULL ||
5464	    strchr(snapname, '%') != NULL) {
5465		return (SET_ERROR(EINVAL));
5466	}
5467
5468	(void) strlcpy(tofs, snapname, sizeof (tofs));
5469	tosnap = strchr(tofs, '@');
5470	*tosnap++ = '\0';
5471
5472	error = nvlist_lookup_string(innvl, "origin", &origin);
5473	if (error && error != ENOENT)
5474		return (error);
5475
5476	error = nvlist_lookup_byte_array(innvl, "begin_record",
5477	    (uchar_t **)&begin_record, &begin_record_size);
5478	if (error != 0 || begin_record_size != sizeof (*begin_record))
5479		return (SET_ERROR(EINVAL));
5480
5481	input_fd = fnvlist_lookup_int32(innvl, "input_fd");
5482
5483	force = nvlist_exists(innvl, "force");
5484	heal = nvlist_exists(innvl, "heal");
5485	resumable = nvlist_exists(innvl, "resumable");
5486
5487	/* we still use "props" here for backwards compatibility */
5488	error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
5489	if (error && error != ENOENT)
5490		goto out;
5491
5492	error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
5493	if (error && error != ENOENT)
5494		goto out;
5495
5496	error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
5497	if (error && error != ENOENT)
5498		goto out;
5499
5500	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
5501	    hidden_args, force, heal, resumable, input_fd, begin_record,
5502	    &read_bytes, &errflags, &errors);
5503
5504	fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
5505	fnvlist_add_uint64(outnvl, "error_flags", errflags);
5506	fnvlist_add_nvlist(outnvl, "errors", errors);
5507
5508out:
5509	nvlist_free(errors);
5510	nvlist_free(recvprops);
5511	nvlist_free(localprops);
5512	nvlist_free(hidden_args);
5513
5514	return (error);
5515}
5516
5517/*
5518 * When stack space is limited, we write replication stream data to the target
5519 * on a separate taskq thread, to make sure there's enough stack space.
5520 */
5521#ifndef HAVE_LARGE_STACKS
5522#define	USE_SEND_TASKQ	1
5523#endif
5524
5525typedef struct dump_bytes_io {
5526	zfs_file_t	*dbi_fp;
5527	caddr_t		dbi_buf;
5528	int		dbi_len;
5529	int		dbi_err;
5530} dump_bytes_io_t;
5531
5532static void
5533dump_bytes_cb(void *arg)
5534{
5535	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
5536	zfs_file_t *fp;
5537	caddr_t buf;
5538
5539	fp = dbi->dbi_fp;
5540	buf = dbi->dbi_buf;
5541
5542	dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
5543}
5544
5545typedef struct dump_bytes_arg {
5546	zfs_file_t	*dba_fp;
5547#ifdef USE_SEND_TASKQ
5548	taskq_t		*dba_tq;
5549	taskq_ent_t	dba_tqent;
5550#endif
5551} dump_bytes_arg_t;
5552
5553static int
5554dump_bytes(objset_t *os, void *buf, int len, void *arg)
5555{
5556	dump_bytes_arg_t *dba = (dump_bytes_arg_t *)arg;
5557	dump_bytes_io_t dbi;
5558
5559	dbi.dbi_fp = dba->dba_fp;
5560	dbi.dbi_buf = buf;
5561	dbi.dbi_len = len;
5562
5563#ifdef USE_SEND_TASKQ
5564	taskq_dispatch_ent(dba->dba_tq, dump_bytes_cb, &dbi, TQ_SLEEP,
5565	    &dba->dba_tqent);
5566	taskq_wait(dba->dba_tq);
5567#else
5568	dump_bytes_cb(&dbi);
5569#endif
5570
5571	return (dbi.dbi_err);
5572}
5573
5574static int
5575dump_bytes_init(dump_bytes_arg_t *dba, int fd, dmu_send_outparams_t *out)
5576{
5577	zfs_file_t *fp = zfs_file_get(fd);
5578	if (fp == NULL)
5579		return (SET_ERROR(EBADF));
5580
5581	dba->dba_fp = fp;
5582#ifdef USE_SEND_TASKQ
5583	dba->dba_tq = taskq_create("z_send", 1, defclsyspri, 0, 0, 0);
5584	taskq_init_ent(&dba->dba_tqent);
5585#endif
5586
5587	memset(out, 0, sizeof (dmu_send_outparams_t));
5588	out->dso_outfunc = dump_bytes;
5589	out->dso_arg = dba;
5590	out->dso_dryrun = B_FALSE;
5591
5592	return (0);
5593}
5594
5595static void
5596dump_bytes_fini(dump_bytes_arg_t *dba)
5597{
5598	zfs_file_put(dba->dba_fp);
5599#ifdef USE_SEND_TASKQ
5600	taskq_destroy(dba->dba_tq);
5601#endif
5602}
5603
5604/*
5605 * inputs:
5606 * zc_name	name of snapshot to send
5607 * zc_cookie	file descriptor to send stream to
5608 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
5609 * zc_sendobj	objsetid of snapshot to send
5610 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
5611 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
5612 *		output size in zc_objset_type.
5613 * zc_flags	lzc_send_flags
5614 *
5615 * outputs:
5616 * zc_objset_type	estimated size, if zc_guid is set
5617 *
5618 * NOTE: This is no longer the preferred interface, any new functionality
5619 *	  should be added to zfs_ioc_send_new() instead.
5620 */
5621static int
5622zfs_ioc_send(zfs_cmd_t *zc)
5623{
5624	int error;
5625	offset_t off;
5626	boolean_t estimate = (zc->zc_guid != 0);
5627	boolean_t embedok = (zc->zc_flags & 0x1);
5628	boolean_t large_block_ok = (zc->zc_flags & 0x2);
5629	boolean_t compressok = (zc->zc_flags & 0x4);
5630	boolean_t rawok = (zc->zc_flags & 0x8);
5631	boolean_t savedok = (zc->zc_flags & 0x10);
5632
5633	if (zc->zc_obj != 0) {
5634		dsl_pool_t *dp;
5635		dsl_dataset_t *tosnap;
5636
5637		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5638		if (error != 0)
5639			return (error);
5640
5641		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5642		if (error != 0) {
5643			dsl_pool_rele(dp, FTAG);
5644			return (error);
5645		}
5646
5647		if (dsl_dir_is_clone(tosnap->ds_dir))
5648			zc->zc_fromobj =
5649			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
5650		dsl_dataset_rele(tosnap, FTAG);
5651		dsl_pool_rele(dp, FTAG);
5652	}
5653
5654	if (estimate) {
5655		dsl_pool_t *dp;
5656		dsl_dataset_t *tosnap;
5657		dsl_dataset_t *fromsnap = NULL;
5658
5659		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5660		if (error != 0)
5661			return (error);
5662
5663		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
5664		    FTAG, &tosnap);
5665		if (error != 0) {
5666			dsl_pool_rele(dp, FTAG);
5667			return (error);
5668		}
5669
5670		if (zc->zc_fromobj != 0) {
5671			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
5672			    FTAG, &fromsnap);
5673			if (error != 0) {
5674				dsl_dataset_rele(tosnap, FTAG);
5675				dsl_pool_rele(dp, FTAG);
5676				return (error);
5677			}
5678		}
5679
5680		error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
5681		    compressok || rawok, savedok, &zc->zc_objset_type);
5682
5683		if (fromsnap != NULL)
5684			dsl_dataset_rele(fromsnap, FTAG);
5685		dsl_dataset_rele(tosnap, FTAG);
5686		dsl_pool_rele(dp, FTAG);
5687	} else {
5688		dump_bytes_arg_t dba;
5689		dmu_send_outparams_t out;
5690		error = dump_bytes_init(&dba, zc->zc_cookie, &out);
5691		if (error)
5692			return (error);
5693
5694		off = zfs_file_off(dba.dba_fp);
5695		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
5696		    zc->zc_fromobj, embedok, large_block_ok, compressok,
5697		    rawok, savedok, zc->zc_cookie, &off, &out);
5698
5699		dump_bytes_fini(&dba);
5700	}
5701	return (error);
5702}
5703
5704/*
5705 * inputs:
5706 * zc_name		name of snapshot on which to report progress
5707 * zc_cookie		file descriptor of send stream
5708 *
5709 * outputs:
5710 * zc_cookie		number of bytes written in send stream thus far
5711 * zc_objset_type	logical size of data traversed by send thus far
5712 */
5713static int
5714zfs_ioc_send_progress(zfs_cmd_t *zc)
5715{
5716	dsl_pool_t *dp;
5717	dsl_dataset_t *ds;
5718	dmu_sendstatus_t *dsp = NULL;
5719	int error;
5720
5721	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5722	if (error != 0)
5723		return (error);
5724
5725	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5726	if (error != 0) {
5727		dsl_pool_rele(dp, FTAG);
5728		return (error);
5729	}
5730
5731	mutex_enter(&ds->ds_sendstream_lock);
5732
5733	/*
5734	 * Iterate over all the send streams currently active on this dataset.
5735	 * If there's one which matches the specified file descriptor _and_ the
5736	 * stream was started by the current process, return the progress of
5737	 * that stream.
5738	 */
5739
5740	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5741	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
5742		if (dsp->dss_outfd == zc->zc_cookie &&
5743		    zfs_proc_is_caller(dsp->dss_proc))
5744			break;
5745	}
5746
5747	if (dsp != NULL) {
5748		zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
5749		    0, 0);
5750		/* This is the closest thing we have to atomic_read_64. */
5751		zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
5752	} else {
5753		error = SET_ERROR(ENOENT);
5754	}
5755
5756	mutex_exit(&ds->ds_sendstream_lock);
5757	dsl_dataset_rele(ds, FTAG);
5758	dsl_pool_rele(dp, FTAG);
5759	return (error);
5760}
5761
5762static int
5763zfs_ioc_inject_fault(zfs_cmd_t *zc)
5764{
5765	int id, error;
5766
5767	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5768	    &zc->zc_inject_record);
5769
5770	if (error == 0)
5771		zc->zc_guid = (uint64_t)id;
5772
5773	return (error);
5774}
5775
5776static int
5777zfs_ioc_clear_fault(zfs_cmd_t *zc)
5778{
5779	return (zio_clear_fault((int)zc->zc_guid));
5780}
5781
5782static int
5783zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5784{
5785	int id = (int)zc->zc_guid;
5786	int error;
5787
5788	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5789	    &zc->zc_inject_record);
5790
5791	zc->zc_guid = id;
5792
5793	return (error);
5794}
5795
5796static int
5797zfs_ioc_error_log(zfs_cmd_t *zc)
5798{
5799	spa_t *spa;
5800	int error;
5801
5802	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5803		return (error);
5804
5805	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5806	    &zc->zc_nvlist_dst_size);
5807
5808	spa_close(spa, FTAG);
5809
5810	return (error);
5811}
5812
5813static int
5814zfs_ioc_clear(zfs_cmd_t *zc)
5815{
5816	spa_t *spa;
5817	vdev_t *vd;
5818	int error;
5819
5820	/*
5821	 * On zpool clear we also fix up missing slogs
5822	 */
5823	mutex_enter(&spa_namespace_lock);
5824	spa = spa_lookup(zc->zc_name);
5825	if (spa == NULL) {
5826		mutex_exit(&spa_namespace_lock);
5827		return (SET_ERROR(EIO));
5828	}
5829	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5830		/* we need to let spa_open/spa_load clear the chains */
5831		spa_set_log_state(spa, SPA_LOG_CLEAR);
5832	}
5833	spa->spa_last_open_failed = 0;
5834	mutex_exit(&spa_namespace_lock);
5835
5836	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5837		error = spa_open(zc->zc_name, &spa, FTAG);
5838	} else {
5839		nvlist_t *policy;
5840		nvlist_t *config = NULL;
5841
5842		if (zc->zc_nvlist_src == 0)
5843			return (SET_ERROR(EINVAL));
5844
5845		if ((error = get_nvlist(zc->zc_nvlist_src,
5846		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5847			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5848			    policy, &config);
5849			if (config != NULL) {
5850				int err;
5851
5852				if ((err = put_nvlist(zc, config)) != 0)
5853					error = err;
5854				nvlist_free(config);
5855			}
5856			nvlist_free(policy);
5857		}
5858	}
5859
5860	if (error != 0)
5861		return (error);
5862
5863	/*
5864	 * If multihost is enabled, resuming I/O is unsafe as another
5865	 * host may have imported the pool. Check for remote activity.
5866	 */
5867	if (spa_multihost(spa) && spa_suspended(spa) &&
5868	    spa_mmp_remote_host_activity(spa)) {
5869		spa_close(spa, FTAG);
5870		return (SET_ERROR(EREMOTEIO));
5871	}
5872
5873	spa_vdev_state_enter(spa, SCL_NONE);
5874
5875	if (zc->zc_guid == 0) {
5876		vd = NULL;
5877	} else {
5878		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5879		if (vd == NULL) {
5880			error = SET_ERROR(ENODEV);
5881			(void) spa_vdev_state_exit(spa, NULL, error);
5882			spa_close(spa, FTAG);
5883			return (error);
5884		}
5885	}
5886
5887	vdev_clear(spa, vd);
5888
5889	(void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
5890	    NULL : spa->spa_root_vdev, 0);
5891
5892	/*
5893	 * Resume any suspended I/Os.
5894	 */
5895	if (zio_resume(spa) != 0)
5896		error = SET_ERROR(EIO);
5897
5898	spa_close(spa, FTAG);
5899
5900	return (error);
5901}
5902
5903/*
5904 * Reopen all the vdevs associated with the pool.
5905 *
5906 * innvl: {
5907 *  "scrub_restart" -> when true and scrub is running, allow to restart
5908 *              scrub as the side effect of the reopen (boolean).
5909 * }
5910 *
5911 * outnvl is unused
5912 */
5913static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
5914	{"scrub_restart",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
5915};
5916
5917static int
5918zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
5919{
5920	(void) outnvl;
5921	spa_t *spa;
5922	int error;
5923	boolean_t rc, scrub_restart = B_TRUE;
5924
5925	if (innvl) {
5926		error = nvlist_lookup_boolean_value(innvl,
5927		    "scrub_restart", &rc);
5928		if (error == 0)
5929			scrub_restart = rc;
5930	}
5931
5932	error = spa_open(pool, &spa, FTAG);
5933	if (error != 0)
5934		return (error);
5935
5936	spa_vdev_state_enter(spa, SCL_NONE);
5937
5938	/*
5939	 * If the scrub_restart flag is B_FALSE and a scrub is already
5940	 * in progress then set spa_scrub_reopen flag to B_TRUE so that
5941	 * we don't restart the scrub as a side effect of the reopen.
5942	 * Otherwise, let vdev_open() decided if a resilver is required.
5943	 */
5944
5945	spa->spa_scrub_reopen = (!scrub_restart &&
5946	    dsl_scan_scrubbing(spa->spa_dsl_pool));
5947	vdev_reopen(spa->spa_root_vdev);
5948	spa->spa_scrub_reopen = B_FALSE;
5949
5950	(void) spa_vdev_state_exit(spa, NULL, 0);
5951	spa_close(spa, FTAG);
5952	return (0);
5953}
5954
5955/*
5956 * inputs:
5957 * zc_name	name of filesystem
5958 *
5959 * outputs:
5960 * zc_string	name of conflicting snapshot, if there is one
5961 */
5962static int
5963zfs_ioc_promote(zfs_cmd_t *zc)
5964{
5965	dsl_pool_t *dp;
5966	dsl_dataset_t *ds, *ods;
5967	char origin[ZFS_MAX_DATASET_NAME_LEN];
5968	char *cp;
5969	int error;
5970
5971	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5972	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5973	    strchr(zc->zc_name, '%'))
5974		return (SET_ERROR(EINVAL));
5975
5976	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5977	if (error != 0)
5978		return (error);
5979
5980	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5981	if (error != 0) {
5982		dsl_pool_rele(dp, FTAG);
5983		return (error);
5984	}
5985
5986	if (!dsl_dir_is_clone(ds->ds_dir)) {
5987		dsl_dataset_rele(ds, FTAG);
5988		dsl_pool_rele(dp, FTAG);
5989		return (SET_ERROR(EINVAL));
5990	}
5991
5992	error = dsl_dataset_hold_obj(dp,
5993	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5994	if (error != 0) {
5995		dsl_dataset_rele(ds, FTAG);
5996		dsl_pool_rele(dp, FTAG);
5997		return (error);
5998	}
5999
6000	dsl_dataset_name(ods, origin);
6001	dsl_dataset_rele(ods, FTAG);
6002	dsl_dataset_rele(ds, FTAG);
6003	dsl_pool_rele(dp, FTAG);
6004
6005	/*
6006	 * We don't need to unmount *all* the origin fs's snapshots, but
6007	 * it's easier.
6008	 */
6009	cp = strchr(origin, '@');
6010	if (cp)
6011		*cp = '\0';
6012	(void) dmu_objset_find(origin,
6013	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
6014	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
6015}
6016
6017/*
6018 * Retrieve a single {user|group|project}{used|quota}@... property.
6019 *
6020 * inputs:
6021 * zc_name	name of filesystem
6022 * zc_objset_type zfs_userquota_prop_t
6023 * zc_value	domain name (eg. "S-1-234-567-89")
6024 * zc_guid	RID/UID/GID
6025 *
6026 * outputs:
6027 * zc_cookie	property value
6028 */
6029static int
6030zfs_ioc_userspace_one(zfs_cmd_t *zc)
6031{
6032	zfsvfs_t *zfsvfs;
6033	int error;
6034
6035	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
6036		return (SET_ERROR(EINVAL));
6037
6038	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
6039	if (error != 0)
6040		return (error);
6041
6042	error = zfs_userspace_one(zfsvfs,
6043	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
6044	zfsvfs_rele(zfsvfs, FTAG);
6045
6046	return (error);
6047}
6048
6049/*
6050 * inputs:
6051 * zc_name		name of filesystem
6052 * zc_cookie		zap cursor
6053 * zc_objset_type	zfs_userquota_prop_t
6054 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
6055 *
6056 * outputs:
6057 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
6058 * zc_cookie	zap cursor
6059 */
6060static int
6061zfs_ioc_userspace_many(zfs_cmd_t *zc)
6062{
6063	zfsvfs_t *zfsvfs;
6064	int bufsize = zc->zc_nvlist_dst_size;
6065
6066	if (bufsize <= 0)
6067		return (SET_ERROR(ENOMEM));
6068
6069	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
6070	if (error != 0)
6071		return (error);
6072
6073	void *buf = vmem_alloc(bufsize, KM_SLEEP);
6074
6075	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
6076	    buf, &zc->zc_nvlist_dst_size);
6077
6078	if (error == 0) {
6079		error = xcopyout(buf,
6080		    (void *)(uintptr_t)zc->zc_nvlist_dst,
6081		    zc->zc_nvlist_dst_size);
6082	}
6083	vmem_free(buf, bufsize);
6084	zfsvfs_rele(zfsvfs, FTAG);
6085
6086	return (error);
6087}
6088
6089/*
6090 * inputs:
6091 * zc_name		name of filesystem
6092 *
6093 * outputs:
6094 * none
6095 */
6096static int
6097zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
6098{
6099	int error = 0;
6100	zfsvfs_t *zfsvfs;
6101
6102	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
6103		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
6104			/*
6105			 * If userused is not enabled, it may be because the
6106			 * objset needs to be closed & reopened (to grow the
6107			 * objset_phys_t).  Suspend/resume the fs will do that.
6108			 */
6109			dsl_dataset_t *ds, *newds;
6110
6111			ds = dmu_objset_ds(zfsvfs->z_os);
6112			error = zfs_suspend_fs(zfsvfs);
6113			if (error == 0) {
6114				dmu_objset_refresh_ownership(ds, &newds,
6115				    B_TRUE, zfsvfs);
6116				error = zfs_resume_fs(zfsvfs, newds);
6117			}
6118		}
6119		if (error == 0) {
6120			mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
6121			if (zfsvfs->z_os->os_upgrade_id == 0) {
6122				/* clear potential error code and retry */
6123				zfsvfs->z_os->os_upgrade_status = 0;
6124				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6125
6126				dsl_pool_config_enter(
6127				    dmu_objset_pool(zfsvfs->z_os), FTAG);
6128				dmu_objset_userspace_upgrade(zfsvfs->z_os);
6129				dsl_pool_config_exit(
6130				    dmu_objset_pool(zfsvfs->z_os), FTAG);
6131			} else {
6132				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6133			}
6134
6135			taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
6136			    zfsvfs->z_os->os_upgrade_id);
6137			error = zfsvfs->z_os->os_upgrade_status;
6138		}
6139		zfs_vfs_rele(zfsvfs);
6140	} else {
6141		objset_t *os;
6142
6143		/* XXX kind of reading contents without owning */
6144		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6145		if (error != 0)
6146			return (error);
6147
6148		mutex_enter(&os->os_upgrade_lock);
6149		if (os->os_upgrade_id == 0) {
6150			/* clear potential error code and retry */
6151			os->os_upgrade_status = 0;
6152			mutex_exit(&os->os_upgrade_lock);
6153
6154			dmu_objset_userspace_upgrade(os);
6155		} else {
6156			mutex_exit(&os->os_upgrade_lock);
6157		}
6158
6159		dsl_pool_rele(dmu_objset_pool(os), FTAG);
6160
6161		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6162		error = os->os_upgrade_status;
6163
6164		dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
6165		    FTAG);
6166	}
6167	return (error);
6168}
6169
6170/*
6171 * inputs:
6172 * zc_name		name of filesystem
6173 *
6174 * outputs:
6175 * none
6176 */
6177static int
6178zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
6179{
6180	objset_t *os;
6181	int error;
6182
6183	error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6184	if (error != 0)
6185		return (error);
6186
6187	if (dmu_objset_userobjspace_upgradable(os) ||
6188	    dmu_objset_projectquota_upgradable(os)) {
6189		mutex_enter(&os->os_upgrade_lock);
6190		if (os->os_upgrade_id == 0) {
6191			/* clear potential error code and retry */
6192			os->os_upgrade_status = 0;
6193			mutex_exit(&os->os_upgrade_lock);
6194
6195			dmu_objset_id_quota_upgrade(os);
6196		} else {
6197			mutex_exit(&os->os_upgrade_lock);
6198		}
6199
6200		dsl_pool_rele(dmu_objset_pool(os), FTAG);
6201
6202		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6203		error = os->os_upgrade_status;
6204	} else {
6205		dsl_pool_rele(dmu_objset_pool(os), FTAG);
6206	}
6207
6208	dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
6209
6210	return (error);
6211}
6212
6213static int
6214zfs_ioc_share(zfs_cmd_t *zc)
6215{
6216	return (SET_ERROR(ENOSYS));
6217}
6218
6219/*
6220 * inputs:
6221 * zc_name		name of containing filesystem
6222 * zc_obj		object # beyond which we want next in-use object #
6223 *
6224 * outputs:
6225 * zc_obj		next in-use object #
6226 */
6227static int
6228zfs_ioc_next_obj(zfs_cmd_t *zc)
6229{
6230	objset_t *os = NULL;
6231	int error;
6232
6233	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
6234	if (error != 0)
6235		return (error);
6236
6237	error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
6238
6239	dmu_objset_rele(os, FTAG);
6240	return (error);
6241}
6242
6243/*
6244 * inputs:
6245 * zc_name		name of filesystem
6246 * zc_value		prefix name for snapshot
6247 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
6248 *
6249 * outputs:
6250 * zc_value		short name of new snapshot
6251 */
6252static int
6253zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
6254{
6255	char *snap_name;
6256	char *hold_name;
6257	minor_t minor;
6258
6259	zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
6260	if (fp == NULL)
6261		return (SET_ERROR(EBADF));
6262
6263	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
6264	    (u_longlong_t)ddi_get_lbolt64());
6265	hold_name = kmem_asprintf("%%%s", zc->zc_value);
6266
6267	int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
6268	    hold_name);
6269	if (error == 0)
6270		(void) strlcpy(zc->zc_value, snap_name,
6271		    sizeof (zc->zc_value));
6272	kmem_strfree(snap_name);
6273	kmem_strfree(hold_name);
6274	zfs_onexit_fd_rele(fp);
6275	return (error);
6276}
6277
6278/*
6279 * inputs:
6280 * zc_name		name of "to" snapshot
6281 * zc_value		name of "from" snapshot
6282 * zc_cookie		file descriptor to write diff data on
6283 *
6284 * outputs:
6285 * dmu_diff_record_t's to the file descriptor
6286 */
6287static int
6288zfs_ioc_diff(zfs_cmd_t *zc)
6289{
6290	zfs_file_t *fp;
6291	offset_t off;
6292	int error;
6293
6294	if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
6295		return (SET_ERROR(EBADF));
6296
6297	off = zfs_file_off(fp);
6298	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
6299
6300	zfs_file_put(fp);
6301
6302	return (error);
6303}
6304
6305static int
6306zfs_ioc_smb_acl(zfs_cmd_t *zc)
6307{
6308	return (SET_ERROR(ENOTSUP));
6309}
6310
6311/*
6312 * innvl: {
6313 *     "holds" -> { snapname -> holdname (string), ... }
6314 *     (optional) "cleanup_fd" -> fd (int32)
6315 * }
6316 *
6317 * outnvl: {
6318 *     snapname -> error value (int32)
6319 *     ...
6320 * }
6321 */
6322static const zfs_ioc_key_t zfs_keys_hold[] = {
6323	{"holds",		DATA_TYPE_NVLIST,	0},
6324	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
6325};
6326
6327static int
6328zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
6329{
6330	(void) pool;
6331	nvpair_t *pair;
6332	nvlist_t *holds;
6333	int cleanup_fd = -1;
6334	int error;
6335	minor_t minor = 0;
6336	zfs_file_t *fp = NULL;
6337
6338	holds = fnvlist_lookup_nvlist(args, "holds");
6339
6340	/* make sure the user didn't pass us any invalid (empty) tags */
6341	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6342	    pair = nvlist_next_nvpair(holds, pair)) {
6343		const char *htag;
6344
6345		error = nvpair_value_string(pair, &htag);
6346		if (error != 0)
6347			return (SET_ERROR(error));
6348
6349		if (strlen(htag) == 0)
6350			return (SET_ERROR(EINVAL));
6351	}
6352
6353	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6354		fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
6355		if (fp == NULL)
6356			return (SET_ERROR(EBADF));
6357	}
6358
6359	error = dsl_dataset_user_hold(holds, minor, errlist);
6360	if (fp != NULL) {
6361		ASSERT3U(minor, !=, 0);
6362		zfs_onexit_fd_rele(fp);
6363	}
6364	return (SET_ERROR(error));
6365}
6366
6367/*
6368 * innvl is not used.
6369 *
6370 * outnvl: {
6371 *    holdname -> time added (uint64 seconds since epoch)
6372 *    ...
6373 * }
6374 */
6375static const zfs_ioc_key_t zfs_keys_get_holds[] = {
6376	/* no nvl keys */
6377};
6378
6379static int
6380zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
6381{
6382	(void) args;
6383	return (dsl_dataset_get_holds(snapname, outnvl));
6384}
6385
6386/*
6387 * innvl: {
6388 *     snapname -> { holdname, ... }
6389 *     ...
6390 * }
6391 *
6392 * outnvl: {
6393 *     snapname -> error value (int32)
6394 *     ...
6395 * }
6396 */
6397static const zfs_ioc_key_t zfs_keys_release[] = {
6398	{"<snapname>...",	DATA_TYPE_NVLIST,	ZK_WILDCARDLIST},
6399};
6400
6401static int
6402zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
6403{
6404	(void) pool;
6405	return (dsl_dataset_user_release(holds, errlist));
6406}
6407
6408/*
6409 * inputs:
6410 * zc_guid		flags (ZEVENT_NONBLOCK)
6411 * zc_cleanup_fd	zevent file descriptor
6412 *
6413 * outputs:
6414 * zc_nvlist_dst	next nvlist event
6415 * zc_cookie		dropped events since last get
6416 */
6417static int
6418zfs_ioc_events_next(zfs_cmd_t *zc)
6419{
6420	zfs_zevent_t *ze;
6421	nvlist_t *event = NULL;
6422	minor_t minor;
6423	uint64_t dropped = 0;
6424	int error;
6425
6426	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
6427	if (fp == NULL)
6428		return (SET_ERROR(EBADF));
6429
6430	do {
6431		error = zfs_zevent_next(ze, &event,
6432		    &zc->zc_nvlist_dst_size, &dropped);
6433		if (event != NULL) {
6434			zc->zc_cookie = dropped;
6435			error = put_nvlist(zc, event);
6436			nvlist_free(event);
6437		}
6438
6439		if (zc->zc_guid & ZEVENT_NONBLOCK)
6440			break;
6441
6442		if ((error == 0) || (error != ENOENT))
6443			break;
6444
6445		error = zfs_zevent_wait(ze);
6446		if (error != 0)
6447			break;
6448	} while (1);
6449
6450	zfs_zevent_fd_rele(fp);
6451
6452	return (error);
6453}
6454
6455/*
6456 * outputs:
6457 * zc_cookie		cleared events count
6458 */
6459static int
6460zfs_ioc_events_clear(zfs_cmd_t *zc)
6461{
6462	uint_t count;
6463
6464	zfs_zevent_drain_all(&count);
6465	zc->zc_cookie = count;
6466
6467	return (0);
6468}
6469
6470/*
6471 * inputs:
6472 * zc_guid		eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
6473 * zc_cleanup		zevent file descriptor
6474 */
6475static int
6476zfs_ioc_events_seek(zfs_cmd_t *zc)
6477{
6478	zfs_zevent_t *ze;
6479	minor_t minor;
6480	int error;
6481
6482	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
6483	if (fp == NULL)
6484		return (SET_ERROR(EBADF));
6485
6486	error = zfs_zevent_seek(ze, zc->zc_guid);
6487	zfs_zevent_fd_rele(fp);
6488
6489	return (error);
6490}
6491
6492/*
6493 * inputs:
6494 * zc_name		name of later filesystem or snapshot
6495 * zc_value		full name of old snapshot or bookmark
6496 *
6497 * outputs:
6498 * zc_cookie		space in bytes
6499 * zc_objset_type	compressed space in bytes
6500 * zc_perm_action	uncompressed space in bytes
6501 */
6502static int
6503zfs_ioc_space_written(zfs_cmd_t *zc)
6504{
6505	int error;
6506	dsl_pool_t *dp;
6507	dsl_dataset_t *new;
6508
6509	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6510	if (error != 0)
6511		return (error);
6512	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
6513	if (error != 0) {
6514		dsl_pool_rele(dp, FTAG);
6515		return (error);
6516	}
6517	if (strchr(zc->zc_value, '#') != NULL) {
6518		zfs_bookmark_phys_t bmp;
6519		error = dsl_bookmark_lookup(dp, zc->zc_value,
6520		    new, &bmp);
6521		if (error == 0) {
6522			error = dsl_dataset_space_written_bookmark(&bmp, new,
6523			    &zc->zc_cookie,
6524			    &zc->zc_objset_type, &zc->zc_perm_action);
6525		}
6526	} else {
6527		dsl_dataset_t *old;
6528		error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
6529
6530		if (error == 0) {
6531			error = dsl_dataset_space_written(old, new,
6532			    &zc->zc_cookie,
6533			    &zc->zc_objset_type, &zc->zc_perm_action);
6534			dsl_dataset_rele(old, FTAG);
6535		}
6536	}
6537	dsl_dataset_rele(new, FTAG);
6538	dsl_pool_rele(dp, FTAG);
6539	return (error);
6540}
6541
6542/*
6543 * innvl: {
6544 *     "firstsnap" -> snapshot name
6545 * }
6546 *
6547 * outnvl: {
6548 *     "used" -> space in bytes
6549 *     "compressed" -> compressed space in bytes
6550 *     "uncompressed" -> uncompressed space in bytes
6551 * }
6552 */
6553static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
6554	{"firstsnap",	DATA_TYPE_STRING,	0},
6555};
6556
6557static int
6558zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
6559{
6560	int error;
6561	dsl_pool_t *dp;
6562	dsl_dataset_t *new, *old;
6563	const char *firstsnap;
6564	uint64_t used, comp, uncomp;
6565
6566	firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
6567
6568	error = dsl_pool_hold(lastsnap, FTAG, &dp);
6569	if (error != 0)
6570		return (error);
6571
6572	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
6573	if (error == 0 && !new->ds_is_snapshot) {
6574		dsl_dataset_rele(new, FTAG);
6575		error = SET_ERROR(EINVAL);
6576	}
6577	if (error != 0) {
6578		dsl_pool_rele(dp, FTAG);
6579		return (error);
6580	}
6581	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
6582	if (error == 0 && !old->ds_is_snapshot) {
6583		dsl_dataset_rele(old, FTAG);
6584		error = SET_ERROR(EINVAL);
6585	}
6586	if (error != 0) {
6587		dsl_dataset_rele(new, FTAG);
6588		dsl_pool_rele(dp, FTAG);
6589		return (error);
6590	}
6591
6592	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
6593	dsl_dataset_rele(old, FTAG);
6594	dsl_dataset_rele(new, FTAG);
6595	dsl_pool_rele(dp, FTAG);
6596	fnvlist_add_uint64(outnvl, "used", used);
6597	fnvlist_add_uint64(outnvl, "compressed", comp);
6598	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6599	return (error);
6600}
6601
6602/*
6603 * innvl: {
6604 *     "fd" -> file descriptor to write stream to (int32)
6605 *     (optional) "fromsnap" -> full snap name to send an incremental from
6606 *     (optional) "largeblockok" -> (value ignored)
6607 *         indicates that blocks > 128KB are permitted
6608 *     (optional) "embedok" -> (value ignored)
6609 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6610 *     (optional) "compressok" -> (value ignored)
6611 *         presence indicates compressed DRR_WRITE records are permitted
6612 *     (optional) "rawok" -> (value ignored)
6613 *         presence indicates raw encrypted records should be used.
6614 *     (optional) "savedok" -> (value ignored)
6615 *         presence indicates we should send a partially received snapshot
6616 *     (optional) "resume_object" and "resume_offset" -> (uint64)
6617 *         if present, resume send stream from specified object and offset.
6618 *     (optional) "redactbook" -> (string)
6619 *         if present, use this bookmark's redaction list to generate a redacted
6620 *         send stream
6621 * }
6622 *
6623 * outnvl is unused
6624 */
6625static const zfs_ioc_key_t zfs_keys_send_new[] = {
6626	{"fd",			DATA_TYPE_INT32,	0},
6627	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6628	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6629	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6630	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6631	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6632	{"savedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6633	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6634	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6635	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6636};
6637
6638static int
6639zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6640{
6641	(void) outnvl;
6642	int error;
6643	offset_t off;
6644	const char *fromname = NULL;
6645	int fd;
6646	boolean_t largeblockok;
6647	boolean_t embedok;
6648	boolean_t compressok;
6649	boolean_t rawok;
6650	boolean_t savedok;
6651	uint64_t resumeobj = 0;
6652	uint64_t resumeoff = 0;
6653	const char *redactbook = NULL;
6654
6655	fd = fnvlist_lookup_int32(innvl, "fd");
6656
6657	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6658
6659	largeblockok = nvlist_exists(innvl, "largeblockok");
6660	embedok = nvlist_exists(innvl, "embedok");
6661	compressok = nvlist_exists(innvl, "compressok");
6662	rawok = nvlist_exists(innvl, "rawok");
6663	savedok = nvlist_exists(innvl, "savedok");
6664
6665	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6666	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6667
6668	(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
6669
6670	dump_bytes_arg_t dba;
6671	dmu_send_outparams_t out;
6672	error = dump_bytes_init(&dba, fd, &out);
6673	if (error)
6674		return (error);
6675
6676	off = zfs_file_off(dba.dba_fp);
6677	error = dmu_send(snapname, fromname, embedok, largeblockok,
6678	    compressok, rawok, savedok, resumeobj, resumeoff,
6679	    redactbook, fd, &off, &out);
6680
6681	dump_bytes_fini(&dba);
6682
6683	return (error);
6684}
6685
6686static int
6687send_space_sum(objset_t *os, void *buf, int len, void *arg)
6688{
6689	(void) os, (void) buf;
6690	uint64_t *size = arg;
6691
6692	*size += len;
6693	return (0);
6694}
6695
6696/*
6697 * Determine approximately how large a zfs send stream will be -- the number
6698 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6699 *
6700 * innvl: {
6701 *     (optional) "from" -> full snap or bookmark name to send an incremental
6702 *                          from
6703 *     (optional) "largeblockok" -> (value ignored)
6704 *         indicates that blocks > 128KB are permitted
6705 *     (optional) "embedok" -> (value ignored)
6706 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6707 *     (optional) "compressok" -> (value ignored)
6708 *         presence indicates compressed DRR_WRITE records are permitted
6709 *     (optional) "rawok" -> (value ignored)
6710 *         presence indicates raw encrypted records should be used.
6711 *     (optional) "resume_object" and "resume_offset" -> (uint64)
6712 *         if present, resume send stream from specified object and offset.
6713 *     (optional) "fd" -> file descriptor to use as a cookie for progress
6714 *         tracking (int32)
6715 * }
6716 *
6717 * outnvl: {
6718 *     "space" -> bytes of space (uint64)
6719 * }
6720 */
6721static const zfs_ioc_key_t zfs_keys_send_space[] = {
6722	{"from",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6723	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6724	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6725	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6726	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6727	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6728	{"fd",			DATA_TYPE_INT32,	ZK_OPTIONAL},
6729	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6730	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6731	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6732	{"bytes",		DATA_TYPE_UINT64,	ZK_OPTIONAL},
6733};
6734
6735static int
6736zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6737{
6738	dsl_pool_t *dp;
6739	dsl_dataset_t *tosnap;
6740	dsl_dataset_t *fromsnap = NULL;
6741	int error;
6742	const char *fromname = NULL;
6743	const char *redactlist_book = NULL;
6744	boolean_t largeblockok;
6745	boolean_t embedok;
6746	boolean_t compressok;
6747	boolean_t rawok;
6748	boolean_t savedok;
6749	uint64_t space = 0;
6750	boolean_t full_estimate = B_FALSE;
6751	uint64_t resumeobj = 0;
6752	uint64_t resumeoff = 0;
6753	uint64_t resume_bytes = 0;
6754	int32_t fd = -1;
6755	zfs_bookmark_phys_t zbm = {0};
6756
6757	error = dsl_pool_hold(snapname, FTAG, &dp);
6758	if (error != 0)
6759		return (error);
6760
6761	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6762	if (error != 0) {
6763		dsl_pool_rele(dp, FTAG);
6764		return (error);
6765	}
6766	(void) nvlist_lookup_int32(innvl, "fd", &fd);
6767
6768	largeblockok = nvlist_exists(innvl, "largeblockok");
6769	embedok = nvlist_exists(innvl, "embedok");
6770	compressok = nvlist_exists(innvl, "compressok");
6771	rawok = nvlist_exists(innvl, "rawok");
6772	savedok = nvlist_exists(innvl, "savedok");
6773	boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
6774	boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
6775	    &redactlist_book) == 0);
6776
6777	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6778	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6779	(void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
6780
6781	if (altbook) {
6782		full_estimate = B_TRUE;
6783	} else if (from) {
6784		if (strchr(fromname, '#')) {
6785			error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
6786
6787			/*
6788			 * dsl_bookmark_lookup() will fail with EXDEV if
6789			 * the from-bookmark and tosnap are at the same txg.
6790			 * However, it's valid to do a send (and therefore,
6791			 * a send estimate) from and to the same time point,
6792			 * if the bookmark is redacted (the incremental send
6793			 * can change what's redacted on the target).  In
6794			 * this case, dsl_bookmark_lookup() fills in zbm
6795			 * but returns EXDEV.  Ignore this error.
6796			 */
6797			if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
6798			    zbm.zbm_guid ==
6799			    dsl_dataset_phys(tosnap)->ds_guid)
6800				error = 0;
6801
6802			if (error != 0) {
6803				dsl_dataset_rele(tosnap, FTAG);
6804				dsl_pool_rele(dp, FTAG);
6805				return (error);
6806			}
6807			if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
6808			    ZBM_FLAG_HAS_FBN)) {
6809				full_estimate = B_TRUE;
6810			}
6811		} else if (strchr(fromname, '@')) {
6812			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6813			if (error != 0) {
6814				dsl_dataset_rele(tosnap, FTAG);
6815				dsl_pool_rele(dp, FTAG);
6816				return (error);
6817			}
6818
6819			if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
6820				full_estimate = B_TRUE;
6821				dsl_dataset_rele(fromsnap, FTAG);
6822			}
6823		} else {
6824			/*
6825			 * from is not properly formatted as a snapshot or
6826			 * bookmark
6827			 */
6828			dsl_dataset_rele(tosnap, FTAG);
6829			dsl_pool_rele(dp, FTAG);
6830			return (SET_ERROR(EINVAL));
6831		}
6832	}
6833
6834	if (full_estimate) {
6835		dmu_send_outparams_t out = {0};
6836		offset_t off = 0;
6837		out.dso_outfunc = send_space_sum;
6838		out.dso_arg = &space;
6839		out.dso_dryrun = B_TRUE;
6840		/*
6841		 * We have to release these holds so dmu_send can take them.  It
6842		 * will do all the error checking we need.
6843		 */
6844		dsl_dataset_rele(tosnap, FTAG);
6845		dsl_pool_rele(dp, FTAG);
6846		error = dmu_send(snapname, fromname, embedok, largeblockok,
6847		    compressok, rawok, savedok, resumeobj, resumeoff,
6848		    redactlist_book, fd, &off, &out);
6849	} else {
6850		error = dmu_send_estimate_fast(tosnap, fromsnap,
6851		    (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
6852		    compressok || rawok, savedok, &space);
6853		space -= resume_bytes;
6854		if (fromsnap != NULL)
6855			dsl_dataset_rele(fromsnap, FTAG);
6856		dsl_dataset_rele(tosnap, FTAG);
6857		dsl_pool_rele(dp, FTAG);
6858	}
6859
6860	fnvlist_add_uint64(outnvl, "space", space);
6861
6862	return (error);
6863}
6864
6865/*
6866 * Sync the currently open TXG to disk for the specified pool.
6867 * This is somewhat similar to 'zfs_sync()'.
6868 * For cases that do not result in error this ioctl will wait for
6869 * the currently open TXG to commit before returning back to the caller.
6870 *
6871 * innvl: {
6872 *  "force" -> when true, force uberblock update even if there is no dirty data.
6873 *             In addition this will cause the vdev configuration to be written
6874 *             out including updating the zpool cache file. (boolean_t)
6875 * }
6876 *
6877 * onvl is unused
6878 */
6879static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
6880	{"force",	DATA_TYPE_BOOLEAN_VALUE,	0},
6881};
6882
6883static int
6884zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
6885{
6886	(void) onvl;
6887	int err;
6888	boolean_t rc, force = B_FALSE;
6889	spa_t *spa;
6890
6891	if ((err = spa_open(pool, &spa, FTAG)) != 0)
6892		return (err);
6893
6894	if (innvl) {
6895		err = nvlist_lookup_boolean_value(innvl, "force", &rc);
6896		if (err == 0)
6897			force = rc;
6898	}
6899
6900	if (force) {
6901		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
6902		vdev_config_dirty(spa->spa_root_vdev);
6903		spa_config_exit(spa, SCL_CONFIG, FTAG);
6904	}
6905	txg_wait_synced(spa_get_dsl(spa), 0);
6906
6907	spa_close(spa, FTAG);
6908
6909	return (0);
6910}
6911
6912/*
6913 * Load a user's wrapping key into the kernel.
6914 * innvl: {
6915 *     "hidden_args" -> { "wkeydata" -> value }
6916 *         raw uint8_t array of encryption wrapping key data (32 bytes)
6917 *     (optional) "noop" -> (value ignored)
6918 *         presence indicated key should only be verified, not loaded
6919 * }
6920 */
6921static const zfs_ioc_key_t zfs_keys_load_key[] = {
6922	{"hidden_args",	DATA_TYPE_NVLIST,	0},
6923	{"noop",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6924};
6925
6926static int
6927zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6928{
6929	(void) outnvl;
6930	int ret;
6931	dsl_crypto_params_t *dcp = NULL;
6932	nvlist_t *hidden_args;
6933	boolean_t noop = nvlist_exists(innvl, "noop");
6934
6935	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6936		ret = SET_ERROR(EINVAL);
6937		goto error;
6938	}
6939
6940	hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
6941
6942	ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
6943	    hidden_args, &dcp);
6944	if (ret != 0)
6945		goto error;
6946
6947	ret = spa_keystore_load_wkey(dsname, dcp, noop);
6948	if (ret != 0)
6949		goto error;
6950
6951	dsl_crypto_params_free(dcp, noop);
6952
6953	return (0);
6954
6955error:
6956	dsl_crypto_params_free(dcp, B_TRUE);
6957	return (ret);
6958}
6959
6960/*
6961 * Unload a user's wrapping key from the kernel.
6962 * Both innvl and outnvl are unused.
6963 */
6964static const zfs_ioc_key_t zfs_keys_unload_key[] = {
6965	/* no nvl keys */
6966};
6967
6968static int
6969zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6970{
6971	(void) innvl, (void) outnvl;
6972	int ret = 0;
6973
6974	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6975		ret = (SET_ERROR(EINVAL));
6976		goto out;
6977	}
6978
6979	ret = spa_keystore_unload_wkey(dsname);
6980	if (ret != 0)
6981		goto out;
6982
6983out:
6984	return (ret);
6985}
6986
6987/*
6988 * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
6989 * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
6990 * here to change how the key is derived in userspace.
6991 *
6992 * innvl: {
6993 *    "hidden_args" (optional) -> { "wkeydata" -> value }
6994 *         raw uint8_t array of new encryption wrapping key data (32 bytes)
6995 *    "props" (optional) -> { prop -> value }
6996 * }
6997 *
6998 * outnvl is unused
6999 */
7000static const zfs_ioc_key_t zfs_keys_change_key[] = {
7001	{"crypt_cmd",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7002	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
7003	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
7004};
7005
7006static int
7007zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7008{
7009	(void) outnvl;
7010	int ret;
7011	uint64_t cmd = DCP_CMD_NONE;
7012	dsl_crypto_params_t *dcp = NULL;
7013	nvlist_t *args = NULL, *hidden_args = NULL;
7014
7015	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7016		ret = (SET_ERROR(EINVAL));
7017		goto error;
7018	}
7019
7020	(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
7021	(void) nvlist_lookup_nvlist(innvl, "props", &args);
7022	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
7023
7024	ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
7025	if (ret != 0)
7026		goto error;
7027
7028	ret = spa_keystore_change_key(dsname, dcp);
7029	if (ret != 0)
7030		goto error;
7031
7032	dsl_crypto_params_free(dcp, B_FALSE);
7033
7034	return (0);
7035
7036error:
7037	dsl_crypto_params_free(dcp, B_TRUE);
7038	return (ret);
7039}
7040
7041static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
7042
7043static void
7044zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7045    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
7046    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
7047{
7048	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
7049
7050	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
7051	ASSERT3U(ioc, <, ZFS_IOC_LAST);
7052	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
7053	ASSERT3P(vec->zvec_func, ==, NULL);
7054
7055	vec->zvec_legacy_func = func;
7056	vec->zvec_secpolicy = secpolicy;
7057	vec->zvec_namecheck = namecheck;
7058	vec->zvec_allow_log = log_history;
7059	vec->zvec_pool_check = pool_check;
7060}
7061
7062/*
7063 * See the block comment at the beginning of this file for details on
7064 * each argument to this function.
7065 */
7066void
7067zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
7068    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
7069    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
7070    boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
7071{
7072	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
7073
7074	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
7075	ASSERT3U(ioc, <, ZFS_IOC_LAST);
7076	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
7077	ASSERT3P(vec->zvec_func, ==, NULL);
7078
7079	/* if we are logging, the name must be valid */
7080	ASSERT(!allow_log || namecheck != NO_NAME);
7081
7082	vec->zvec_name = name;
7083	vec->zvec_func = func;
7084	vec->zvec_secpolicy = secpolicy;
7085	vec->zvec_namecheck = namecheck;
7086	vec->zvec_pool_check = pool_check;
7087	vec->zvec_smush_outnvlist = smush_outnvlist;
7088	vec->zvec_allow_log = allow_log;
7089	vec->zvec_nvl_keys = nvl_keys;
7090	vec->zvec_nvl_key_count = num_keys;
7091}
7092
7093static void
7094zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7095    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
7096    zfs_ioc_poolcheck_t pool_check)
7097{
7098	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7099	    POOL_NAME, log_history, pool_check);
7100}
7101
7102void
7103zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7104    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
7105{
7106	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7107	    DATASET_NAME, B_FALSE, pool_check);
7108}
7109
7110static void
7111zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7112{
7113	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
7114	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7115}
7116
7117static void
7118zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7119    zfs_secpolicy_func_t *secpolicy)
7120{
7121	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7122	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
7123}
7124
7125static void
7126zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
7127    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
7128{
7129	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7130	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
7131}
7132
7133static void
7134zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7135{
7136	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
7137	    zfs_secpolicy_read);
7138}
7139
7140static void
7141zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7142    zfs_secpolicy_func_t *secpolicy)
7143{
7144	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7145	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7146}
7147
7148static void
7149zfs_ioctl_init(void)
7150{
7151	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
7152	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
7153	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7154	    zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
7155
7156	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
7157	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
7158	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7159	    zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
7160
7161	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
7162	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
7163	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7164	    zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
7165
7166	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
7167	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
7168	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7169	    zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
7170
7171	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
7172	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
7173	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7174	    zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
7175
7176	zfs_ioctl_register("create", ZFS_IOC_CREATE,
7177	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
7178	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7179	    zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
7180
7181	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
7182	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
7183	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7184	    zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
7185
7186	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
7187	    zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
7188	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7189	    zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
7190
7191	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
7192	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
7193	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7194	    zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
7195
7196	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
7197	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
7198	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7199	    zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
7200	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
7201	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
7202	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7203	    zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
7204
7205	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
7206	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
7207	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7208	    zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
7209
7210	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
7211	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
7212	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7213	    zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
7214
7215	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
7216	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
7217	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7218	    zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
7219
7220	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
7221	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
7222	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7223	    zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
7224
7225	zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
7226	    zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
7227	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
7228	    ARRAY_SIZE(zfs_keys_get_bookmark_props));
7229
7230	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
7231	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
7232	    POOL_NAME,
7233	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7234	    zfs_keys_destroy_bookmarks,
7235	    ARRAY_SIZE(zfs_keys_destroy_bookmarks));
7236
7237	zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
7238	    zfs_ioc_recv_new, zfs_secpolicy_recv, DATASET_NAME,
7239	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7240	    zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
7241	zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
7242	    zfs_ioc_load_key, zfs_secpolicy_load_key,
7243	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7244	    zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
7245	zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
7246	    zfs_ioc_unload_key, zfs_secpolicy_load_key,
7247	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7248	    zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
7249	zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
7250	    zfs_ioc_change_key, zfs_secpolicy_change_key,
7251	    DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
7252	    B_TRUE, B_TRUE, zfs_keys_change_key,
7253	    ARRAY_SIZE(zfs_keys_change_key));
7254
7255	zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
7256	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
7257	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7258	    zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
7259	zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
7260	    zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
7261	    B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
7262
7263	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
7264	    zfs_ioc_channel_program, zfs_secpolicy_config,
7265	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
7266	    B_TRUE, zfs_keys_channel_program,
7267	    ARRAY_SIZE(zfs_keys_channel_program));
7268
7269	zfs_ioctl_register("redact", ZFS_IOC_REDACT,
7270	    zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
7271	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7272	    zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
7273
7274	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
7275	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
7276	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7277	    zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
7278
7279	zfs_ioctl_register("zpool_discard_checkpoint",
7280	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
7281	    zfs_secpolicy_config, POOL_NAME,
7282	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7283	    zfs_keys_pool_discard_checkpoint,
7284	    ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
7285
7286	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
7287	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
7288	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7289	    zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
7290
7291	zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
7292	    zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
7293	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7294	    zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
7295
7296	zfs_ioctl_register("wait", ZFS_IOC_WAIT,
7297	    zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
7298	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7299	    zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
7300
7301	zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
7302	    zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
7303	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7304	    zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
7305
7306	zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
7307	    zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
7308	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7309	    zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
7310
7311	zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
7312	    zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
7313	    POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
7314	    zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
7315
7316	zfs_ioctl_register("zpool_vdev_get_props", ZFS_IOC_VDEV_GET_PROPS,
7317	    zfs_ioc_vdev_get_props, zfs_secpolicy_read, POOL_NAME,
7318	    POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_vdev_get_props,
7319	    ARRAY_SIZE(zfs_keys_vdev_get_props));
7320
7321	zfs_ioctl_register("zpool_vdev_set_props", ZFS_IOC_VDEV_SET_PROPS,
7322	    zfs_ioc_vdev_set_props, zfs_secpolicy_config, POOL_NAME,
7323	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7324	    zfs_keys_vdev_set_props, ARRAY_SIZE(zfs_keys_vdev_set_props));
7325
7326	zfs_ioctl_register("scrub", ZFS_IOC_POOL_SCRUB,
7327	    zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME,
7328	    POOL_CHECK_NONE, B_TRUE, B_TRUE,
7329	    zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
7330
7331	/* IOCTLS that use the legacy function signature */
7332
7333	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
7334	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
7335
7336	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
7337	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7338	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
7339	    zfs_ioc_pool_scan);
7340	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
7341	    zfs_ioc_pool_upgrade);
7342	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
7343	    zfs_ioc_vdev_add);
7344	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
7345	    zfs_ioc_vdev_remove);
7346	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
7347	    zfs_ioc_vdev_set_state);
7348	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
7349	    zfs_ioc_vdev_attach);
7350	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
7351	    zfs_ioc_vdev_detach);
7352	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
7353	    zfs_ioc_vdev_setpath);
7354	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
7355	    zfs_ioc_vdev_setfru);
7356	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
7357	    zfs_ioc_pool_set_props);
7358	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
7359	    zfs_ioc_vdev_split);
7360	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
7361	    zfs_ioc_pool_reguid);
7362
7363	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
7364	    zfs_ioc_pool_configs, zfs_secpolicy_none);
7365	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
7366	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
7367	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
7368	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
7369	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
7370	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
7371	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
7372	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
7373
7374	/*
7375	 * pool destroy, and export don't log the history as part of
7376	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
7377	 * does the logging of those commands.
7378	 */
7379	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
7380	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7381	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
7382	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7383
7384	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
7385	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7386	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
7387	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7388
7389	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
7390	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
7391	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
7392	    zfs_ioc_dsobj_to_dsname,
7393	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
7394	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
7395	    zfs_ioc_pool_get_history,
7396	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7397
7398	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
7399	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7400
7401	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
7402	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
7403
7404	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
7405	    zfs_ioc_space_written);
7406	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
7407	    zfs_ioc_objset_recvd_props);
7408	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
7409	    zfs_ioc_next_obj);
7410	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
7411	    zfs_ioc_get_fsacl);
7412	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
7413	    zfs_ioc_objset_stats);
7414	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
7415	    zfs_ioc_objset_zplprops);
7416	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
7417	    zfs_ioc_dataset_list_next);
7418	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
7419	    zfs_ioc_snapshot_list_next);
7420	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
7421	    zfs_ioc_send_progress);
7422
7423	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
7424	    zfs_ioc_diff, zfs_secpolicy_diff);
7425	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
7426	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
7427	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
7428	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
7429	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
7430	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
7431	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
7432	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
7433	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
7434	    zfs_ioc_send, zfs_secpolicy_send);
7435
7436	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
7437	    zfs_secpolicy_none);
7438	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
7439	    zfs_secpolicy_destroy);
7440	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
7441	    zfs_secpolicy_rename);
7442	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
7443	    zfs_secpolicy_recv);
7444	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
7445	    zfs_secpolicy_promote);
7446	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
7447	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
7448	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
7449	    zfs_secpolicy_set_fsacl);
7450
7451	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
7452	    zfs_secpolicy_share, POOL_CHECK_NONE);
7453	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
7454	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
7455	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
7456	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
7457	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7458	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
7459	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
7460	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7461
7462	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
7463	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7464	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
7465	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7466	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
7467	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7468
7469	zfs_ioctl_init_os();
7470}
7471
7472/*
7473 * Verify that for non-legacy ioctls the input nvlist
7474 * pairs match against the expected input.
7475 *
7476 * Possible errors are:
7477 * ZFS_ERR_IOC_ARG_UNAVAIL	An unrecognized nvpair was encountered
7478 * ZFS_ERR_IOC_ARG_REQUIRED	A required nvpair is missing
7479 * ZFS_ERR_IOC_ARG_BADTYPE	Invalid type for nvpair
7480 */
7481static int
7482zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
7483{
7484	const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
7485	boolean_t required_keys_found = B_FALSE;
7486
7487	/*
7488	 * examine each input pair
7489	 */
7490	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
7491	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
7492		const char *name = nvpair_name(pair);
7493		data_type_t type = nvpair_type(pair);
7494		boolean_t identified = B_FALSE;
7495
7496		/*
7497		 * check pair against the documented names and type
7498		 */
7499		for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
7500			/* if not a wild card name, check for an exact match */
7501			if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
7502			    strcmp(nvl_keys[k].zkey_name, name) != 0)
7503				continue;
7504
7505			identified = B_TRUE;
7506
7507			if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
7508			    nvl_keys[k].zkey_type != type) {
7509				return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
7510			}
7511
7512			if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
7513				continue;
7514
7515			required_keys_found = B_TRUE;
7516			break;
7517		}
7518
7519		/* allow an 'optional' key, everything else is invalid */
7520		if (!identified &&
7521		    (strcmp(name, "optional") != 0 ||
7522		    type != DATA_TYPE_NVLIST)) {
7523			return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
7524		}
7525	}
7526
7527	/* verify that all required keys were found */
7528	for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
7529		if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
7530			continue;
7531
7532		if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
7533			/* at least one non-optional key is expected here */
7534			if (!required_keys_found)
7535				return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
7536			continue;
7537		}
7538
7539		if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
7540			return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
7541	}
7542
7543	return (0);
7544}
7545
7546static int
7547pool_status_check(const char *name, zfs_ioc_namecheck_t type,
7548    zfs_ioc_poolcheck_t check)
7549{
7550	spa_t *spa;
7551	int error;
7552
7553	ASSERT(type == POOL_NAME || type == DATASET_NAME ||
7554	    type == ENTITY_NAME);
7555
7556	if (check & POOL_CHECK_NONE)
7557		return (0);
7558
7559	error = spa_open(name, &spa, FTAG);
7560	if (error == 0) {
7561		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
7562			error = SET_ERROR(EAGAIN);
7563		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
7564			error = SET_ERROR(EROFS);
7565		spa_close(spa, FTAG);
7566	}
7567	return (error);
7568}
7569
7570int
7571zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
7572{
7573	zfsdev_state_t *zs, *fpd;
7574
7575	ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
7576
7577	fpd = zfs_file_private(fp);
7578	if (fpd == NULL)
7579		return (SET_ERROR(EBADF));
7580
7581	mutex_enter(&zfsdev_state_lock);
7582
7583	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
7584
7585		if (zs->zs_minor == -1)
7586			continue;
7587
7588		if (fpd == zs) {
7589			*minorp = fpd->zs_minor;
7590			mutex_exit(&zfsdev_state_lock);
7591			return (0);
7592		}
7593	}
7594
7595	mutex_exit(&zfsdev_state_lock);
7596
7597	return (SET_ERROR(EBADF));
7598}
7599
7600void *
7601zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
7602{
7603	zfsdev_state_t *zs;
7604
7605	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
7606		if (zs->zs_minor == minor) {
7607			membar_consumer();
7608			switch (which) {
7609			case ZST_ONEXIT:
7610				return (zs->zs_onexit);
7611			case ZST_ZEVENT:
7612				return (zs->zs_zevent);
7613			case ZST_ALL:
7614				return (zs);
7615			}
7616		}
7617	}
7618
7619	return (NULL);
7620}
7621
7622/*
7623 * Find a free minor number.  The zfsdev_state_list is expected to
7624 * be short since it is only a list of currently open file handles.
7625 */
7626static minor_t
7627zfsdev_minor_alloc(void)
7628{
7629	static minor_t last_minor = 0;
7630	minor_t m;
7631
7632	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7633
7634	for (m = last_minor + 1; m != last_minor; m++) {
7635		if (m > ZFSDEV_MAX_MINOR)
7636			m = 1;
7637		if (zfsdev_get_state(m, ZST_ALL) == NULL) {
7638			last_minor = m;
7639			return (m);
7640		}
7641	}
7642
7643	return (0);
7644}
7645
7646int
7647zfsdev_state_init(void *priv)
7648{
7649	zfsdev_state_t *zs, *zsprev = NULL;
7650	minor_t minor;
7651	boolean_t newzs = B_FALSE;
7652
7653	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7654
7655	minor = zfsdev_minor_alloc();
7656	if (minor == 0)
7657		return (SET_ERROR(ENXIO));
7658
7659	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
7660		if (zs->zs_minor == -1)
7661			break;
7662		zsprev = zs;
7663	}
7664
7665	if (!zs) {
7666		zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
7667		newzs = B_TRUE;
7668	}
7669
7670	zfsdev_private_set_state(priv, zs);
7671
7672	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
7673	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
7674
7675	/*
7676	 * In order to provide for lock-free concurrent read access
7677	 * to the minor list in zfsdev_get_state(), new entries
7678	 * must be completely written before linking them into the
7679	 * list whereas existing entries are already linked; the last
7680	 * operation must be updating zs_minor (from -1 to the new
7681	 * value).
7682	 */
7683	if (newzs) {
7684		zs->zs_minor = minor;
7685		membar_producer();
7686		zsprev->zs_next = zs;
7687	} else {
7688		membar_producer();
7689		zs->zs_minor = minor;
7690	}
7691
7692	return (0);
7693}
7694
7695void
7696zfsdev_state_destroy(void *priv)
7697{
7698	zfsdev_state_t *zs = zfsdev_private_get_state(priv);
7699
7700	ASSERT(zs != NULL);
7701	ASSERT3S(zs->zs_minor, >, 0);
7702
7703	/*
7704	 * The last reference to this zfsdev file descriptor is being dropped.
7705	 * We don't have to worry about lookup grabbing this state object, and
7706	 * zfsdev_state_init() will not try to reuse this object until it is
7707	 * invalidated by setting zs_minor to -1.  Invalidation must be done
7708	 * last, with a memory barrier to ensure ordering.  This lets us avoid
7709	 * taking the global zfsdev state lock around destruction.
7710	 */
7711	zfs_onexit_destroy(zs->zs_onexit);
7712	zfs_zevent_destroy(zs->zs_zevent);
7713	zs->zs_onexit = NULL;
7714	zs->zs_zevent = NULL;
7715	membar_producer();
7716	zs->zs_minor = -1;
7717}
7718
7719long
7720zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
7721{
7722	int error, cmd;
7723	const zfs_ioc_vec_t *vec;
7724	char *saved_poolname = NULL;
7725	uint64_t max_nvlist_src_size;
7726	size_t saved_poolname_len = 0;
7727	nvlist_t *innvl = NULL;
7728	fstrans_cookie_t cookie;
7729	hrtime_t start_time = gethrtime();
7730
7731	cmd = vecnum;
7732	error = 0;
7733	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
7734		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
7735
7736	vec = &zfs_ioc_vec[vecnum];
7737
7738	/*
7739	 * The registered ioctl list may be sparse, verify that either
7740	 * a normal or legacy handler are registered.
7741	 */
7742	if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
7743		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
7744
7745	zc->zc_iflags = flag & FKIOCTL;
7746	max_nvlist_src_size = zfs_max_nvlist_src_size_os();
7747	if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
7748		/*
7749		 * Make sure the user doesn't pass in an insane value for
7750		 * zc_nvlist_src_size.  We have to check, since we will end
7751		 * up allocating that much memory inside of get_nvlist().  This
7752		 * prevents a nefarious user from allocating tons of kernel
7753		 * memory.
7754		 *
7755		 * Also, we return EINVAL instead of ENOMEM here.  The reason
7756		 * being that returning ENOMEM from an ioctl() has a special
7757		 * connotation; that the user's size value is too small and
7758		 * needs to be expanded to hold the nvlist.  See
7759		 * zcmd_expand_dst_nvlist() for details.
7760		 */
7761		error = SET_ERROR(EINVAL);	/* User's size too big */
7762
7763	} else if (zc->zc_nvlist_src_size != 0) {
7764		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
7765		    zc->zc_iflags, &innvl);
7766		if (error != 0)
7767			goto out;
7768	}
7769
7770	/*
7771	 * Ensure that all pool/dataset names are valid before we pass down to
7772	 * the lower layers.
7773	 */
7774	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
7775	switch (vec->zvec_namecheck) {
7776	case POOL_NAME:
7777		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
7778			error = SET_ERROR(EINVAL);
7779		else
7780			error = pool_status_check(zc->zc_name,
7781			    vec->zvec_namecheck, vec->zvec_pool_check);
7782		break;
7783
7784	case DATASET_NAME:
7785		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
7786			error = SET_ERROR(EINVAL);
7787		else
7788			error = pool_status_check(zc->zc_name,
7789			    vec->zvec_namecheck, vec->zvec_pool_check);
7790		break;
7791
7792	case ENTITY_NAME:
7793		if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
7794			error = SET_ERROR(EINVAL);
7795		} else {
7796			error = pool_status_check(zc->zc_name,
7797			    vec->zvec_namecheck, vec->zvec_pool_check);
7798		}
7799		break;
7800
7801	case NO_NAME:
7802		break;
7803	}
7804	/*
7805	 * Ensure that all input pairs are valid before we pass them down
7806	 * to the lower layers.
7807	 *
7808	 * The vectored functions can use fnvlist_lookup_{type} for any
7809	 * required pairs since zfs_check_input_nvpairs() confirmed that
7810	 * they exist and are of the correct type.
7811	 */
7812	if (error == 0 && vec->zvec_func != NULL) {
7813		error = zfs_check_input_nvpairs(innvl, vec);
7814		if (error != 0)
7815			goto out;
7816	}
7817
7818	if (error == 0) {
7819		cookie = spl_fstrans_mark();
7820		error = vec->zvec_secpolicy(zc, innvl, CRED());
7821		spl_fstrans_unmark(cookie);
7822	}
7823
7824	if (error != 0)
7825		goto out;
7826
7827	/* legacy ioctls can modify zc_name */
7828	/*
7829	 * Can't use kmem_strdup() as we might truncate the string and
7830	 * kmem_strfree() would then free with incorrect size.
7831	 */
7832	saved_poolname_len = strlen(zc->zc_name) + 1;
7833	saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
7834
7835	strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
7836	saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
7837
7838	if (vec->zvec_func != NULL) {
7839		nvlist_t *outnvl;
7840		int puterror = 0;
7841		spa_t *spa;
7842		nvlist_t *lognv = NULL;
7843
7844		ASSERT(vec->zvec_legacy_func == NULL);
7845
7846		/*
7847		 * Add the innvl to the lognv before calling the func,
7848		 * in case the func changes the innvl.
7849		 */
7850		if (vec->zvec_allow_log) {
7851			lognv = fnvlist_alloc();
7852			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
7853			    vec->zvec_name);
7854			if (!nvlist_empty(innvl)) {
7855				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
7856				    innvl);
7857			}
7858		}
7859
7860		outnvl = fnvlist_alloc();
7861		cookie = spl_fstrans_mark();
7862		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
7863		spl_fstrans_unmark(cookie);
7864
7865		/*
7866		 * Some commands can partially execute, modify state, and still
7867		 * return an error.  In these cases, attempt to record what
7868		 * was modified.
7869		 */
7870		if ((error == 0 ||
7871		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
7872		    vec->zvec_allow_log &&
7873		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
7874			if (!nvlist_empty(outnvl)) {
7875				size_t out_size = fnvlist_size(outnvl);
7876				if (out_size > zfs_history_output_max) {
7877					fnvlist_add_int64(lognv,
7878					    ZPOOL_HIST_OUTPUT_SIZE, out_size);
7879				} else {
7880					fnvlist_add_nvlist(lognv,
7881					    ZPOOL_HIST_OUTPUT_NVL, outnvl);
7882				}
7883			}
7884			if (error != 0) {
7885				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
7886				    error);
7887			}
7888			fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
7889			    gethrtime() - start_time);
7890			(void) spa_history_log_nvl(spa, lognv);
7891			spa_close(spa, FTAG);
7892		}
7893		fnvlist_free(lognv);
7894
7895		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
7896			int smusherror = 0;
7897			if (vec->zvec_smush_outnvlist) {
7898				smusherror = nvlist_smush(outnvl,
7899				    zc->zc_nvlist_dst_size);
7900			}
7901			if (smusherror == 0)
7902				puterror = put_nvlist(zc, outnvl);
7903		}
7904
7905		if (puterror != 0)
7906			error = puterror;
7907
7908		nvlist_free(outnvl);
7909	} else {
7910		cookie = spl_fstrans_mark();
7911		error = vec->zvec_legacy_func(zc);
7912		spl_fstrans_unmark(cookie);
7913	}
7914
7915out:
7916	nvlist_free(innvl);
7917	if (error == 0 && vec->zvec_allow_log) {
7918		char *s = tsd_get(zfs_allow_log_key);
7919		if (s != NULL)
7920			kmem_strfree(s);
7921		(void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
7922	}
7923	if (saved_poolname != NULL)
7924		kmem_free(saved_poolname, saved_poolname_len);
7925
7926	return (error);
7927}
7928
7929int
7930zfs_kmod_init(void)
7931{
7932	int error;
7933
7934	if ((error = zvol_init()) != 0)
7935		return (error);
7936
7937	spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
7938	zfs_init();
7939
7940	zfs_ioctl_init();
7941
7942	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
7943	zfsdev_state_listhead.zs_minor = -1;
7944
7945	if ((error = zfsdev_attach()) != 0)
7946		goto out;
7947
7948	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7949	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7950
7951	return (0);
7952out:
7953	zfs_fini();
7954	spa_fini();
7955	zvol_fini();
7956
7957	return (error);
7958}
7959
7960void
7961zfs_kmod_fini(void)
7962{
7963	zfsdev_state_t *zs, *zsnext = NULL;
7964
7965	zfsdev_detach();
7966
7967	mutex_destroy(&zfsdev_state_lock);
7968
7969	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zsnext) {
7970		zsnext = zs->zs_next;
7971		if (zs->zs_onexit)
7972			zfs_onexit_destroy(zs->zs_onexit);
7973		if (zs->zs_zevent)
7974			zfs_zevent_destroy(zs->zs_zevent);
7975		if (zs != &zfsdev_state_listhead)
7976			kmem_free(zs, sizeof (zfsdev_state_t));
7977	}
7978
7979	zfs_ereport_taskq_fini();	/* run before zfs_fini() on Linux */
7980	zfs_fini();
7981	spa_fini();
7982	zvol_fini();
7983
7984	tsd_destroy(&rrw_tsd_key);
7985	tsd_destroy(&zfs_allow_log_key);
7986}
7987
7988ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, U64, ZMOD_RW,
7989	"Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
7990
7991ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, U64, ZMOD_RW,
7992	"Maximum size in bytes of ZFS ioctl output that will be logged");
7993