zfs_ioctl.c revision 11876:5fce03ad05c6
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/param.h>
28#include <sys/errno.h>
29#include <sys/uio.h>
30#include <sys/buf.h>
31#include <sys/modctl.h>
32#include <sys/open.h>
33#include <sys/file.h>
34#include <sys/kmem.h>
35#include <sys/conf.h>
36#include <sys/cmn_err.h>
37#include <sys/stat.h>
38#include <sys/zfs_ioctl.h>
39#include <sys/zfs_vfsops.h>
40#include <sys/zfs_znode.h>
41#include <sys/zap.h>
42#include <sys/spa.h>
43#include <sys/spa_impl.h>
44#include <sys/vdev.h>
45#include <sys/priv_impl.h>
46#include <sys/dmu.h>
47#include <sys/dsl_dir.h>
48#include <sys/dsl_dataset.h>
49#include <sys/dsl_prop.h>
50#include <sys/dsl_deleg.h>
51#include <sys/dmu_objset.h>
52#include <sys/ddi.h>
53#include <sys/sunddi.h>
54#include <sys/sunldi.h>
55#include <sys/policy.h>
56#include <sys/zone.h>
57#include <sys/nvpair.h>
58#include <sys/pathname.h>
59#include <sys/mount.h>
60#include <sys/sdt.h>
61#include <sys/fs/zfs.h>
62#include <sys/zfs_ctldir.h>
63#include <sys/zfs_dir.h>
64#include <sys/zvol.h>
65#include <sharefs/share.h>
66#include <sys/dmu_objset.h>
67
68#include "zfs_namecheck.h"
69#include "zfs_prop.h"
70#include "zfs_deleg.h"
71
72extern struct modlfs zfs_modlfs;
73
74extern void zfs_init(void);
75extern void zfs_fini(void);
76
77ldi_ident_t zfs_li = NULL;
78dev_info_t *zfs_dip;
79
80typedef int zfs_ioc_func_t(zfs_cmd_t *);
81typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
82
83typedef enum {
84	NO_NAME,
85	POOL_NAME,
86	DATASET_NAME
87} zfs_ioc_namecheck_t;
88
89typedef struct zfs_ioc_vec {
90	zfs_ioc_func_t		*zvec_func;
91	zfs_secpolicy_func_t	*zvec_secpolicy;
92	zfs_ioc_namecheck_t	zvec_namecheck;
93	boolean_t		zvec_his_log;
94	boolean_t		zvec_pool_check;
95} zfs_ioc_vec_t;
96
97/* This array is indexed by zfs_userquota_prop_t */
98static const char *userquota_perms[] = {
99	ZFS_DELEG_PERM_USERUSED,
100	ZFS_DELEG_PERM_USERQUOTA,
101	ZFS_DELEG_PERM_GROUPUSED,
102	ZFS_DELEG_PERM_GROUPQUOTA,
103};
104
105static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
106static int zfs_check_settable(const char *name, nvpair_t *property,
107    cred_t *cr);
108static int zfs_check_clearable(char *dataset, nvlist_t *props,
109    nvlist_t **errors);
110static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
111    boolean_t *);
112int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
113
114/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
115void
116__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
117{
118	const char *newfile;
119	char buf[256];
120	va_list adx;
121
122	/*
123	 * Get rid of annoying "../common/" prefix to filename.
124	 */
125	newfile = strrchr(file, '/');
126	if (newfile != NULL) {
127		newfile = newfile + 1; /* Get rid of leading / */
128	} else {
129		newfile = file;
130	}
131
132	va_start(adx, fmt);
133	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
134	va_end(adx);
135
136	/*
137	 * To get this data, use the zfs-dprintf probe as so:
138	 * dtrace -q -n 'zfs-dprintf \
139	 *	/stringof(arg0) == "dbuf.c"/ \
140	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
141	 * arg0 = file name
142	 * arg1 = function name
143	 * arg2 = line number
144	 * arg3 = message
145	 */
146	DTRACE_PROBE4(zfs__dprintf,
147	    char *, newfile, char *, func, int, line, char *, buf);
148}
149
150static void
151history_str_free(char *buf)
152{
153	kmem_free(buf, HIS_MAX_RECORD_LEN);
154}
155
156static char *
157history_str_get(zfs_cmd_t *zc)
158{
159	char *buf;
160
161	if (zc->zc_history == NULL)
162		return (NULL);
163
164	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
165	if (copyinstr((void *)(uintptr_t)zc->zc_history,
166	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
167		history_str_free(buf);
168		return (NULL);
169	}
170
171	buf[HIS_MAX_RECORD_LEN -1] = '\0';
172
173	return (buf);
174}
175
176/*
177 * Check to see if the named dataset is currently defined as bootable
178 */
179static boolean_t
180zfs_is_bootfs(const char *name)
181{
182	objset_t *os;
183
184	if (dmu_objset_hold(name, FTAG, &os) == 0) {
185		boolean_t ret;
186		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
187		dmu_objset_rele(os, FTAG);
188		return (ret);
189	}
190	return (B_FALSE);
191}
192
193/*
194 * zfs_earlier_version
195 *
196 *	Return non-zero if the spa version is less than requested version.
197 */
198static int
199zfs_earlier_version(const char *name, int version)
200{
201	spa_t *spa;
202
203	if (spa_open(name, &spa, FTAG) == 0) {
204		if (spa_version(spa) < version) {
205			spa_close(spa, FTAG);
206			return (1);
207		}
208		spa_close(spa, FTAG);
209	}
210	return (0);
211}
212
213/*
214 * zpl_earlier_version
215 *
216 * Return TRUE if the ZPL version is less than requested version.
217 */
218static boolean_t
219zpl_earlier_version(const char *name, int version)
220{
221	objset_t *os;
222	boolean_t rc = B_TRUE;
223
224	if (dmu_objset_hold(name, FTAG, &os) == 0) {
225		uint64_t zplversion;
226
227		if (dmu_objset_type(os) != DMU_OST_ZFS) {
228			dmu_objset_rele(os, FTAG);
229			return (B_TRUE);
230		}
231		/* XXX reading from non-owned objset */
232		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
233			rc = zplversion < version;
234		dmu_objset_rele(os, FTAG);
235	}
236	return (rc);
237}
238
239static void
240zfs_log_history(zfs_cmd_t *zc)
241{
242	spa_t *spa;
243	char *buf;
244
245	if ((buf = history_str_get(zc)) == NULL)
246		return;
247
248	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
249		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
250			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
251		spa_close(spa, FTAG);
252	}
253	history_str_free(buf);
254}
255
256/*
257 * Policy for top-level read operations (list pools).  Requires no privileges,
258 * and can be used in the local zone, as there is no associated dataset.
259 */
260/* ARGSUSED */
261static int
262zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
263{
264	return (0);
265}
266
267/*
268 * Policy for dataset read operations (list children, get statistics).  Requires
269 * no privileges, but must be visible in the local zone.
270 */
271/* ARGSUSED */
272static int
273zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
274{
275	if (INGLOBALZONE(curproc) ||
276	    zone_dataset_visible(zc->zc_name, NULL))
277		return (0);
278
279	return (ENOENT);
280}
281
282static int
283zfs_dozonecheck(const char *dataset, cred_t *cr)
284{
285	uint64_t zoned;
286	int writable = 1;
287
288	/*
289	 * The dataset must be visible by this zone -- check this first
290	 * so they don't see EPERM on something they shouldn't know about.
291	 */
292	if (!INGLOBALZONE(curproc) &&
293	    !zone_dataset_visible(dataset, &writable))
294		return (ENOENT);
295
296	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
297		return (ENOENT);
298
299	if (INGLOBALZONE(curproc)) {
300		/*
301		 * If the fs is zoned, only root can access it from the
302		 * global zone.
303		 */
304		if (secpolicy_zfs(cr) && zoned)
305			return (EPERM);
306	} else {
307		/*
308		 * If we are in a local zone, the 'zoned' property must be set.
309		 */
310		if (!zoned)
311			return (EPERM);
312
313		/* must be writable by this zone */
314		if (!writable)
315			return (EPERM);
316	}
317	return (0);
318}
319
320int
321zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
322{
323	int error;
324
325	error = zfs_dozonecheck(name, cr);
326	if (error == 0) {
327		error = secpolicy_zfs(cr);
328		if (error)
329			error = dsl_deleg_access(name, perm, cr);
330	}
331	return (error);
332}
333
334/*
335 * Policy for setting the security label property.
336 *
337 * Returns 0 for success, non-zero for access and other errors.
338 */
339static int
340zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
341{
342	char		ds_hexsl[MAXNAMELEN];
343	bslabel_t	ds_sl, new_sl;
344	boolean_t	new_default = FALSE;
345	uint64_t	zoned;
346	int		needed_priv = -1;
347	int		error;
348
349	/* First get the existing dataset label. */
350	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
351	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
352	if (error)
353		return (EPERM);
354
355	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
356		new_default = TRUE;
357
358	/* The label must be translatable */
359	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
360		return (EINVAL);
361
362	/*
363	 * In a non-global zone, disallow attempts to set a label that
364	 * doesn't match that of the zone; otherwise no other checks
365	 * are needed.
366	 */
367	if (!INGLOBALZONE(curproc)) {
368		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
369			return (EPERM);
370		return (0);
371	}
372
373	/*
374	 * For global-zone datasets (i.e., those whose zoned property is
375	 * "off", verify that the specified new label is valid for the
376	 * global zone.
377	 */
378	if (dsl_prop_get_integer(name,
379	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
380		return (EPERM);
381	if (!zoned) {
382		if (zfs_check_global_label(name, strval) != 0)
383			return (EPERM);
384	}
385
386	/*
387	 * If the existing dataset label is nondefault, check if the
388	 * dataset is mounted (label cannot be changed while mounted).
389	 * Get the zfsvfs; if there isn't one, then the dataset isn't
390	 * mounted (or isn't a dataset, doesn't exist, ...).
391	 */
392	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
393		objset_t *os;
394		static char *setsl_tag = "setsl_tag";
395
396		/*
397		 * Try to own the dataset; abort if there is any error,
398		 * (e.g., already mounted, in use, or other error).
399		 */
400		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
401		    setsl_tag, &os);
402		if (error)
403			return (EPERM);
404
405		dmu_objset_disown(os, setsl_tag);
406
407		if (new_default) {
408			needed_priv = PRIV_FILE_DOWNGRADE_SL;
409			goto out_check;
410		}
411
412		if (hexstr_to_label(strval, &new_sl) != 0)
413			return (EPERM);
414
415		if (blstrictdom(&ds_sl, &new_sl))
416			needed_priv = PRIV_FILE_DOWNGRADE_SL;
417		else if (blstrictdom(&new_sl, &ds_sl))
418			needed_priv = PRIV_FILE_UPGRADE_SL;
419	} else {
420		/* dataset currently has a default label */
421		if (!new_default)
422			needed_priv = PRIV_FILE_UPGRADE_SL;
423	}
424
425out_check:
426	if (needed_priv != -1)
427		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
428	return (0);
429}
430
431static int
432zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
433    cred_t *cr)
434{
435	char *strval;
436
437	/*
438	 * Check permissions for special properties.
439	 */
440	switch (prop) {
441	case ZFS_PROP_ZONED:
442		/*
443		 * Disallow setting of 'zoned' from within a local zone.
444		 */
445		if (!INGLOBALZONE(curproc))
446			return (EPERM);
447		break;
448
449	case ZFS_PROP_QUOTA:
450		if (!INGLOBALZONE(curproc)) {
451			uint64_t zoned;
452			char setpoint[MAXNAMELEN];
453			/*
454			 * Unprivileged users are allowed to modify the
455			 * quota on things *under* (ie. contained by)
456			 * the thing they own.
457			 */
458			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
459			    setpoint))
460				return (EPERM);
461			if (!zoned || strlen(dsname) <= strlen(setpoint))
462				return (EPERM);
463		}
464		break;
465
466	case ZFS_PROP_MLSLABEL:
467		if (!is_system_labeled())
468			return (EPERM);
469
470		if (nvpair_value_string(propval, &strval) == 0) {
471			int err;
472
473			err = zfs_set_slabel_policy(dsname, strval, CRED());
474			if (err != 0)
475				return (err);
476		}
477		break;
478	}
479
480	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
481}
482
483int
484zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
485{
486	int error;
487
488	error = zfs_dozonecheck(zc->zc_name, cr);
489	if (error)
490		return (error);
491
492	/*
493	 * permission to set permissions will be evaluated later in
494	 * dsl_deleg_can_allow()
495	 */
496	return (0);
497}
498
499int
500zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
501{
502	return (zfs_secpolicy_write_perms(zc->zc_name,
503	    ZFS_DELEG_PERM_ROLLBACK, cr));
504}
505
506int
507zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
508{
509	return (zfs_secpolicy_write_perms(zc->zc_name,
510	    ZFS_DELEG_PERM_SEND, cr));
511}
512
513static int
514zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
515{
516	vnode_t *vp;
517	int error;
518
519	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
520	    NO_FOLLOW, NULL, &vp)) != 0)
521		return (error);
522
523	/* Now make sure mntpnt and dataset are ZFS */
524
525	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
526	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
527	    zc->zc_name) != 0)) {
528		VN_RELE(vp);
529		return (EPERM);
530	}
531
532	VN_RELE(vp);
533	return (dsl_deleg_access(zc->zc_name,
534	    ZFS_DELEG_PERM_SHARE, cr));
535}
536
537int
538zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
539{
540	if (!INGLOBALZONE(curproc))
541		return (EPERM);
542
543	if (secpolicy_nfs(cr) == 0) {
544		return (0);
545	} else {
546		return (zfs_secpolicy_deleg_share(zc, cr));
547	}
548}
549
550int
551zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
552{
553	if (!INGLOBALZONE(curproc))
554		return (EPERM);
555
556	if (secpolicy_smb(cr) == 0) {
557		return (0);
558	} else {
559		return (zfs_secpolicy_deleg_share(zc, cr));
560	}
561}
562
563static int
564zfs_get_parent(const char *datasetname, char *parent, int parentsize)
565{
566	char *cp;
567
568	/*
569	 * Remove the @bla or /bla from the end of the name to get the parent.
570	 */
571	(void) strncpy(parent, datasetname, parentsize);
572	cp = strrchr(parent, '@');
573	if (cp != NULL) {
574		cp[0] = '\0';
575	} else {
576		cp = strrchr(parent, '/');
577		if (cp == NULL)
578			return (ENOENT);
579		cp[0] = '\0';
580	}
581
582	return (0);
583}
584
585int
586zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
587{
588	int error;
589
590	if ((error = zfs_secpolicy_write_perms(name,
591	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
592		return (error);
593
594	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
595}
596
597static int
598zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
599{
600	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
601}
602
603/*
604 * Destroying snapshots with delegated permissions requires
605 * descendent mount and destroy permissions.
606 * Reassemble the full filesystem@snap name so dsl_deleg_access()
607 * can do the correct permission check.
608 *
609 * Since this routine is used when doing a recursive destroy of snapshots
610 * and destroying snapshots requires descendent permissions, a successfull
611 * check of the top level snapshot applies to snapshots of all descendent
612 * datasets as well.
613 */
614static int
615zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
616{
617	int error;
618	char *dsname;
619
620	dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
621
622	error = zfs_secpolicy_destroy_perms(dsname, cr);
623
624	strfree(dsname);
625	return (error);
626}
627
628int
629zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
630{
631	char	parentname[MAXNAMELEN];
632	int	error;
633
634	if ((error = zfs_secpolicy_write_perms(from,
635	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
636		return (error);
637
638	if ((error = zfs_secpolicy_write_perms(from,
639	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
640		return (error);
641
642	if ((error = zfs_get_parent(to, parentname,
643	    sizeof (parentname))) != 0)
644		return (error);
645
646	if ((error = zfs_secpolicy_write_perms(parentname,
647	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
648		return (error);
649
650	if ((error = zfs_secpolicy_write_perms(parentname,
651	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
652		return (error);
653
654	return (error);
655}
656
657static int
658zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
659{
660	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
661}
662
663static int
664zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
665{
666	char	parentname[MAXNAMELEN];
667	objset_t *clone;
668	int error;
669
670	error = zfs_secpolicy_write_perms(zc->zc_name,
671	    ZFS_DELEG_PERM_PROMOTE, cr);
672	if (error)
673		return (error);
674
675	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
676
677	if (error == 0) {
678		dsl_dataset_t *pclone = NULL;
679		dsl_dir_t *dd;
680		dd = clone->os_dsl_dataset->ds_dir;
681
682		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
683		error = dsl_dataset_hold_obj(dd->dd_pool,
684		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
685		rw_exit(&dd->dd_pool->dp_config_rwlock);
686		if (error) {
687			dmu_objset_rele(clone, FTAG);
688			return (error);
689		}
690
691		error = zfs_secpolicy_write_perms(zc->zc_name,
692		    ZFS_DELEG_PERM_MOUNT, cr);
693
694		dsl_dataset_name(pclone, parentname);
695		dmu_objset_rele(clone, FTAG);
696		dsl_dataset_rele(pclone, FTAG);
697		if (error == 0)
698			error = zfs_secpolicy_write_perms(parentname,
699			    ZFS_DELEG_PERM_PROMOTE, cr);
700	}
701	return (error);
702}
703
704static int
705zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
706{
707	int error;
708
709	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
710	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
711		return (error);
712
713	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
714	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
715		return (error);
716
717	return (zfs_secpolicy_write_perms(zc->zc_name,
718	    ZFS_DELEG_PERM_CREATE, cr));
719}
720
721int
722zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
723{
724	return (zfs_secpolicy_write_perms(name,
725	    ZFS_DELEG_PERM_SNAPSHOT, cr));
726}
727
728static int
729zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
730{
731
732	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
733}
734
735static int
736zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
737{
738	char	parentname[MAXNAMELEN];
739	int	error;
740
741	if ((error = zfs_get_parent(zc->zc_name, parentname,
742	    sizeof (parentname))) != 0)
743		return (error);
744
745	if (zc->zc_value[0] != '\0') {
746		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
747		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
748			return (error);
749	}
750
751	if ((error = zfs_secpolicy_write_perms(parentname,
752	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
753		return (error);
754
755	error = zfs_secpolicy_write_perms(parentname,
756	    ZFS_DELEG_PERM_MOUNT, cr);
757
758	return (error);
759}
760
761static int
762zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
763{
764	int error;
765
766	error = secpolicy_fs_unmount(cr, NULL);
767	if (error) {
768		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
769	}
770	return (error);
771}
772
773/*
774 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
775 * SYS_CONFIG privilege, which is not available in a local zone.
776 */
777/* ARGSUSED */
778static int
779zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
780{
781	if (secpolicy_sys_config(cr, B_FALSE) != 0)
782		return (EPERM);
783
784	return (0);
785}
786
787/*
788 * Policy for fault injection.  Requires all privileges.
789 */
790/* ARGSUSED */
791static int
792zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
793{
794	return (secpolicy_zinject(cr));
795}
796
797static int
798zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
799{
800	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
801
802	if (prop == ZPROP_INVAL) {
803		if (!zfs_prop_user(zc->zc_value))
804			return (EINVAL);
805		return (zfs_secpolicy_write_perms(zc->zc_name,
806		    ZFS_DELEG_PERM_USERPROP, cr));
807	} else {
808		return (zfs_secpolicy_setprop(zc->zc_name, prop,
809		    NULL, cr));
810	}
811}
812
813static int
814zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
815{
816	int err = zfs_secpolicy_read(zc, cr);
817	if (err)
818		return (err);
819
820	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
821		return (EINVAL);
822
823	if (zc->zc_value[0] == 0) {
824		/*
825		 * They are asking about a posix uid/gid.  If it's
826		 * themself, allow it.
827		 */
828		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
829		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
830			if (zc->zc_guid == crgetuid(cr))
831				return (0);
832		} else {
833			if (groupmember(zc->zc_guid, cr))
834				return (0);
835		}
836	}
837
838	return (zfs_secpolicy_write_perms(zc->zc_name,
839	    userquota_perms[zc->zc_objset_type], cr));
840}
841
842static int
843zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
844{
845	int err = zfs_secpolicy_read(zc, cr);
846	if (err)
847		return (err);
848
849	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
850		return (EINVAL);
851
852	return (zfs_secpolicy_write_perms(zc->zc_name,
853	    userquota_perms[zc->zc_objset_type], cr));
854}
855
856static int
857zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
858{
859	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
860	    NULL, cr));
861}
862
863static int
864zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
865{
866	return (zfs_secpolicy_write_perms(zc->zc_name,
867	    ZFS_DELEG_PERM_HOLD, cr));
868}
869
870static int
871zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
872{
873	return (zfs_secpolicy_write_perms(zc->zc_name,
874	    ZFS_DELEG_PERM_RELEASE, cr));
875}
876
877/*
878 * Returns the nvlist as specified by the user in the zfs_cmd_t.
879 */
880static int
881get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
882{
883	char *packed;
884	int error;
885	nvlist_t *list = NULL;
886
887	/*
888	 * Read in and unpack the user-supplied nvlist.
889	 */
890	if (size == 0)
891		return (EINVAL);
892
893	packed = kmem_alloc(size, KM_SLEEP);
894
895	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
896	    iflag)) != 0) {
897		kmem_free(packed, size);
898		return (error);
899	}
900
901	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
902		kmem_free(packed, size);
903		return (error);
904	}
905
906	kmem_free(packed, size);
907
908	*nvp = list;
909	return (0);
910}
911
912static int
913fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
914{
915	size_t size;
916
917	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
918
919	if (size > zc->zc_nvlist_dst_size) {
920		nvpair_t *more_errors;
921		int n = 0;
922
923		if (zc->zc_nvlist_dst_size < 1024)
924			return (ENOMEM);
925
926		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
927		more_errors = nvlist_prev_nvpair(*errors, NULL);
928
929		do {
930			nvpair_t *pair = nvlist_prev_nvpair(*errors,
931			    more_errors);
932			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
933			n++;
934			VERIFY(nvlist_size(*errors, &size,
935			    NV_ENCODE_NATIVE) == 0);
936		} while (size > zc->zc_nvlist_dst_size);
937
938		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
939		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
940		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
941		ASSERT(size <= zc->zc_nvlist_dst_size);
942	}
943
944	return (0);
945}
946
947static int
948put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
949{
950	char *packed = NULL;
951	int error = 0;
952	size_t size;
953
954	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
955
956	if (size > zc->zc_nvlist_dst_size) {
957		error = ENOMEM;
958	} else {
959		packed = kmem_alloc(size, KM_SLEEP);
960		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
961		    KM_SLEEP) == 0);
962		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
963		    size, zc->zc_iflags) != 0)
964			error = EFAULT;
965		kmem_free(packed, size);
966	}
967
968	zc->zc_nvlist_dst_size = size;
969	return (error);
970}
971
972static int
973getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
974{
975	objset_t *os;
976	int error;
977
978	error = dmu_objset_hold(dsname, FTAG, &os);
979	if (error)
980		return (error);
981	if (dmu_objset_type(os) != DMU_OST_ZFS) {
982		dmu_objset_rele(os, FTAG);
983		return (EINVAL);
984	}
985
986	mutex_enter(&os->os_user_ptr_lock);
987	*zfvp = dmu_objset_get_user(os);
988	if (*zfvp) {
989		VFS_HOLD((*zfvp)->z_vfs);
990	} else {
991		error = ESRCH;
992	}
993	mutex_exit(&os->os_user_ptr_lock);
994	dmu_objset_rele(os, FTAG);
995	return (error);
996}
997
998/*
999 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1000 * case its z_vfs will be NULL, and it will be opened as the owner.
1001 */
1002static int
1003zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp)
1004{
1005	int error = 0;
1006
1007	if (getzfsvfs(name, zfvp) != 0)
1008		error = zfsvfs_create(name, zfvp);
1009	if (error == 0) {
1010		rrw_enter(&(*zfvp)->z_teardown_lock, RW_READER, tag);
1011		if ((*zfvp)->z_unmounted) {
1012			/*
1013			 * XXX we could probably try again, since the unmounting
1014			 * thread should be just about to disassociate the
1015			 * objset from the zfsvfs.
1016			 */
1017			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1018			return (EBUSY);
1019		}
1020	}
1021	return (error);
1022}
1023
1024static void
1025zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1026{
1027	rrw_exit(&zfsvfs->z_teardown_lock, tag);
1028
1029	if (zfsvfs->z_vfs) {
1030		VFS_RELE(zfsvfs->z_vfs);
1031	} else {
1032		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1033		zfsvfs_free(zfsvfs);
1034	}
1035}
1036
1037static int
1038zfs_ioc_pool_create(zfs_cmd_t *zc)
1039{
1040	int error;
1041	nvlist_t *config, *props = NULL;
1042	nvlist_t *rootprops = NULL;
1043	nvlist_t *zplprops = NULL;
1044	char *buf;
1045
1046	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1047	    zc->zc_iflags, &config))
1048		return (error);
1049
1050	if (zc->zc_nvlist_src_size != 0 && (error =
1051	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1052	    zc->zc_iflags, &props))) {
1053		nvlist_free(config);
1054		return (error);
1055	}
1056
1057	if (props) {
1058		nvlist_t *nvl = NULL;
1059		uint64_t version = SPA_VERSION;
1060
1061		(void) nvlist_lookup_uint64(props,
1062		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1063		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
1064			error = EINVAL;
1065			goto pool_props_bad;
1066		}
1067		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1068		if (nvl) {
1069			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1070			if (error != 0) {
1071				nvlist_free(config);
1072				nvlist_free(props);
1073				return (error);
1074			}
1075			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1076		}
1077		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1078		error = zfs_fill_zplprops_root(version, rootprops,
1079		    zplprops, NULL);
1080		if (error)
1081			goto pool_props_bad;
1082	}
1083
1084	buf = history_str_get(zc);
1085
1086	error = spa_create(zc->zc_name, config, props, buf, zplprops);
1087
1088	/*
1089	 * Set the remaining root properties
1090	 */
1091	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1092	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1093		(void) spa_destroy(zc->zc_name);
1094
1095	if (buf != NULL)
1096		history_str_free(buf);
1097
1098pool_props_bad:
1099	nvlist_free(rootprops);
1100	nvlist_free(zplprops);
1101	nvlist_free(config);
1102	nvlist_free(props);
1103
1104	return (error);
1105}
1106
1107static int
1108zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1109{
1110	int error;
1111	zfs_log_history(zc);
1112	error = spa_destroy(zc->zc_name);
1113	if (error == 0)
1114		zvol_remove_minors(zc->zc_name);
1115	return (error);
1116}
1117
1118static int
1119zfs_ioc_pool_import(zfs_cmd_t *zc)
1120{
1121	nvlist_t *config, *props = NULL;
1122	uint64_t guid;
1123	int error;
1124
1125	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1126	    zc->zc_iflags, &config)) != 0)
1127		return (error);
1128
1129	if (zc->zc_nvlist_src_size != 0 && (error =
1130	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1131	    zc->zc_iflags, &props))) {
1132		nvlist_free(config);
1133		return (error);
1134	}
1135
1136	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1137	    guid != zc->zc_guid)
1138		error = EINVAL;
1139	else if (zc->zc_cookie)
1140		error = spa_import_verbatim(zc->zc_name, config, props);
1141	else
1142		error = spa_import(zc->zc_name, config, props);
1143
1144	if (zc->zc_nvlist_dst != 0)
1145		(void) put_nvlist(zc, config);
1146
1147	nvlist_free(config);
1148
1149	if (props)
1150		nvlist_free(props);
1151
1152	return (error);
1153}
1154
1155static int
1156zfs_ioc_pool_export(zfs_cmd_t *zc)
1157{
1158	int error;
1159	boolean_t force = (boolean_t)zc->zc_cookie;
1160	boolean_t hardforce = (boolean_t)zc->zc_guid;
1161
1162	zfs_log_history(zc);
1163	error = spa_export(zc->zc_name, NULL, force, hardforce);
1164	if (error == 0)
1165		zvol_remove_minors(zc->zc_name);
1166	return (error);
1167}
1168
1169static int
1170zfs_ioc_pool_configs(zfs_cmd_t *zc)
1171{
1172	nvlist_t *configs;
1173	int error;
1174
1175	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1176		return (EEXIST);
1177
1178	error = put_nvlist(zc, configs);
1179
1180	nvlist_free(configs);
1181
1182	return (error);
1183}
1184
1185static int
1186zfs_ioc_pool_stats(zfs_cmd_t *zc)
1187{
1188	nvlist_t *config;
1189	int error;
1190	int ret = 0;
1191
1192	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1193	    sizeof (zc->zc_value));
1194
1195	if (config != NULL) {
1196		ret = put_nvlist(zc, config);
1197		nvlist_free(config);
1198
1199		/*
1200		 * The config may be present even if 'error' is non-zero.
1201		 * In this case we return success, and preserve the real errno
1202		 * in 'zc_cookie'.
1203		 */
1204		zc->zc_cookie = error;
1205	} else {
1206		ret = error;
1207	}
1208
1209	return (ret);
1210}
1211
1212/*
1213 * Try to import the given pool, returning pool stats as appropriate so that
1214 * user land knows which devices are available and overall pool health.
1215 */
1216static int
1217zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1218{
1219	nvlist_t *tryconfig, *config;
1220	int error;
1221
1222	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1223	    zc->zc_iflags, &tryconfig)) != 0)
1224		return (error);
1225
1226	config = spa_tryimport(tryconfig);
1227
1228	nvlist_free(tryconfig);
1229
1230	if (config == NULL)
1231		return (EINVAL);
1232
1233	error = put_nvlist(zc, config);
1234	nvlist_free(config);
1235
1236	return (error);
1237}
1238
1239static int
1240zfs_ioc_pool_scrub(zfs_cmd_t *zc)
1241{
1242	spa_t *spa;
1243	int error;
1244
1245	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1246		return (error);
1247
1248	error = spa_scrub(spa, zc->zc_cookie);
1249
1250	spa_close(spa, FTAG);
1251
1252	return (error);
1253}
1254
1255static int
1256zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1257{
1258	spa_t *spa;
1259	int error;
1260
1261	error = spa_open(zc->zc_name, &spa, FTAG);
1262	if (error == 0) {
1263		spa_freeze(spa);
1264		spa_close(spa, FTAG);
1265	}
1266	return (error);
1267}
1268
1269static int
1270zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1271{
1272	spa_t *spa;
1273	int error;
1274
1275	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1276		return (error);
1277
1278	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
1279		spa_close(spa, FTAG);
1280		return (EINVAL);
1281	}
1282
1283	spa_upgrade(spa, zc->zc_cookie);
1284	spa_close(spa, FTAG);
1285
1286	return (error);
1287}
1288
1289static int
1290zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1291{
1292	spa_t *spa;
1293	char *hist_buf;
1294	uint64_t size;
1295	int error;
1296
1297	if ((size = zc->zc_history_len) == 0)
1298		return (EINVAL);
1299
1300	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1301		return (error);
1302
1303	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1304		spa_close(spa, FTAG);
1305		return (ENOTSUP);
1306	}
1307
1308	hist_buf = kmem_alloc(size, KM_SLEEP);
1309	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1310	    &zc->zc_history_len, hist_buf)) == 0) {
1311		error = ddi_copyout(hist_buf,
1312		    (void *)(uintptr_t)zc->zc_history,
1313		    zc->zc_history_len, zc->zc_iflags);
1314	}
1315
1316	spa_close(spa, FTAG);
1317	kmem_free(hist_buf, size);
1318	return (error);
1319}
1320
1321static int
1322zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1323{
1324	int error;
1325
1326	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1327		return (error);
1328
1329	return (0);
1330}
1331
1332/*
1333 * inputs:
1334 * zc_name		name of filesystem
1335 * zc_obj		object to find
1336 *
1337 * outputs:
1338 * zc_value		name of object
1339 */
1340static int
1341zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1342{
1343	objset_t *os;
1344	int error;
1345
1346	/* XXX reading from objset not owned */
1347	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1348		return (error);
1349	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1350		dmu_objset_rele(os, FTAG);
1351		return (EINVAL);
1352	}
1353	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1354	    sizeof (zc->zc_value));
1355	dmu_objset_rele(os, FTAG);
1356
1357	return (error);
1358}
1359
1360static int
1361zfs_ioc_vdev_add(zfs_cmd_t *zc)
1362{
1363	spa_t *spa;
1364	int error;
1365	nvlist_t *config, **l2cache, **spares;
1366	uint_t nl2cache = 0, nspares = 0;
1367
1368	error = spa_open(zc->zc_name, &spa, FTAG);
1369	if (error != 0)
1370		return (error);
1371
1372	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1373	    zc->zc_iflags, &config);
1374	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1375	    &l2cache, &nl2cache);
1376
1377	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1378	    &spares, &nspares);
1379
1380	/*
1381	 * A root pool with concatenated devices is not supported.
1382	 * Thus, can not add a device to a root pool.
1383	 *
1384	 * Intent log device can not be added to a rootpool because
1385	 * during mountroot, zil is replayed, a seperated log device
1386	 * can not be accessed during the mountroot time.
1387	 *
1388	 * l2cache and spare devices are ok to be added to a rootpool.
1389	 */
1390	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1391		nvlist_free(config);
1392		spa_close(spa, FTAG);
1393		return (EDOM);
1394	}
1395
1396	if (error == 0) {
1397		error = spa_vdev_add(spa, config);
1398		nvlist_free(config);
1399	}
1400	spa_close(spa, FTAG);
1401	return (error);
1402}
1403
1404static int
1405zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1406{
1407	spa_t *spa;
1408	int error;
1409
1410	error = spa_open(zc->zc_name, &spa, FTAG);
1411	if (error != 0)
1412		return (error);
1413	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1414	spa_close(spa, FTAG);
1415	return (error);
1416}
1417
1418static int
1419zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1420{
1421	spa_t *spa;
1422	int error;
1423	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1424
1425	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1426		return (error);
1427	switch (zc->zc_cookie) {
1428	case VDEV_STATE_ONLINE:
1429		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1430		break;
1431
1432	case VDEV_STATE_OFFLINE:
1433		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1434		break;
1435
1436	case VDEV_STATE_FAULTED:
1437		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1438		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1439			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1440
1441		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1442		break;
1443
1444	case VDEV_STATE_DEGRADED:
1445		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1446		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1447			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1448
1449		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1450		break;
1451
1452	default:
1453		error = EINVAL;
1454	}
1455	zc->zc_cookie = newstate;
1456	spa_close(spa, FTAG);
1457	return (error);
1458}
1459
1460static int
1461zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1462{
1463	spa_t *spa;
1464	int replacing = zc->zc_cookie;
1465	nvlist_t *config;
1466	int error;
1467
1468	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1469		return (error);
1470
1471	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1472	    zc->zc_iflags, &config)) == 0) {
1473		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1474		nvlist_free(config);
1475	}
1476
1477	spa_close(spa, FTAG);
1478	return (error);
1479}
1480
1481static int
1482zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1483{
1484	spa_t *spa;
1485	int error;
1486
1487	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1488		return (error);
1489
1490	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1491
1492	spa_close(spa, FTAG);
1493	return (error);
1494}
1495
1496static int
1497zfs_ioc_vdev_split(zfs_cmd_t *zc)
1498{
1499	spa_t *spa;
1500	nvlist_t *config, *props = NULL;
1501	int error;
1502	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1503
1504	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1505		return (error);
1506
1507	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1508	    zc->zc_iflags, &config)) {
1509		spa_close(spa, FTAG);
1510		return (error);
1511	}
1512
1513	if (zc->zc_nvlist_src_size != 0 && (error =
1514	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1515	    zc->zc_iflags, &props))) {
1516		spa_close(spa, FTAG);
1517		nvlist_free(config);
1518		return (error);
1519	}
1520
1521	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1522
1523	spa_close(spa, FTAG);
1524
1525	nvlist_free(config);
1526	nvlist_free(props);
1527
1528	return (error);
1529}
1530
1531static int
1532zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1533{
1534	spa_t *spa;
1535	char *path = zc->zc_value;
1536	uint64_t guid = zc->zc_guid;
1537	int error;
1538
1539	error = spa_open(zc->zc_name, &spa, FTAG);
1540	if (error != 0)
1541		return (error);
1542
1543	error = spa_vdev_setpath(spa, guid, path);
1544	spa_close(spa, FTAG);
1545	return (error);
1546}
1547
1548static int
1549zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1550{
1551	spa_t *spa;
1552	char *fru = zc->zc_value;
1553	uint64_t guid = zc->zc_guid;
1554	int error;
1555
1556	error = spa_open(zc->zc_name, &spa, FTAG);
1557	if (error != 0)
1558		return (error);
1559
1560	error = spa_vdev_setfru(spa, guid, fru);
1561	spa_close(spa, FTAG);
1562	return (error);
1563}
1564
1565/*
1566 * inputs:
1567 * zc_name		name of filesystem
1568 * zc_nvlist_dst_size	size of buffer for property nvlist
1569 *
1570 * outputs:
1571 * zc_objset_stats	stats
1572 * zc_nvlist_dst	property nvlist
1573 * zc_nvlist_dst_size	size of property nvlist
1574 */
1575static int
1576zfs_ioc_objset_stats(zfs_cmd_t *zc)
1577{
1578	objset_t *os = NULL;
1579	int error;
1580	nvlist_t *nv;
1581
1582	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1583		return (error);
1584
1585	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1586
1587	if (zc->zc_nvlist_dst != 0 &&
1588	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1589		dmu_objset_stats(os, nv);
1590		/*
1591		 * NB: zvol_get_stats() will read the objset contents,
1592		 * which we aren't supposed to do with a
1593		 * DS_MODE_USER hold, because it could be
1594		 * inconsistent.  So this is a bit of a workaround...
1595		 * XXX reading with out owning
1596		 */
1597		if (!zc->zc_objset_stats.dds_inconsistent) {
1598			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1599				VERIFY(zvol_get_stats(os, nv) == 0);
1600		}
1601		error = put_nvlist(zc, nv);
1602		nvlist_free(nv);
1603	}
1604
1605	dmu_objset_rele(os, FTAG);
1606	return (error);
1607}
1608
1609/*
1610 * inputs:
1611 * zc_name		name of filesystem
1612 * zc_nvlist_dst_size	size of buffer for property nvlist
1613 *
1614 * outputs:
1615 * zc_nvlist_dst	received property nvlist
1616 * zc_nvlist_dst_size	size of received property nvlist
1617 *
1618 * Gets received properties (distinct from local properties on or after
1619 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1620 * local property values.
1621 */
1622static int
1623zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1624{
1625	objset_t *os = NULL;
1626	int error;
1627	nvlist_t *nv;
1628
1629	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1630		return (error);
1631
1632	/*
1633	 * Without this check, we would return local property values if the
1634	 * caller has not already received properties on or after
1635	 * SPA_VERSION_RECVD_PROPS.
1636	 */
1637	if (!dsl_prop_get_hasrecvd(os)) {
1638		dmu_objset_rele(os, FTAG);
1639		return (ENOTSUP);
1640	}
1641
1642	if (zc->zc_nvlist_dst != 0 &&
1643	    (error = dsl_prop_get_received(os, &nv)) == 0) {
1644		error = put_nvlist(zc, nv);
1645		nvlist_free(nv);
1646	}
1647
1648	dmu_objset_rele(os, FTAG);
1649	return (error);
1650}
1651
1652static int
1653nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1654{
1655	uint64_t value;
1656	int error;
1657
1658	/*
1659	 * zfs_get_zplprop() will either find a value or give us
1660	 * the default value (if there is one).
1661	 */
1662	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1663		return (error);
1664	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1665	return (0);
1666}
1667
1668/*
1669 * inputs:
1670 * zc_name		name of filesystem
1671 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1672 *
1673 * outputs:
1674 * zc_nvlist_dst	zpl property nvlist
1675 * zc_nvlist_dst_size	size of zpl property nvlist
1676 */
1677static int
1678zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1679{
1680	objset_t *os;
1681	int err;
1682
1683	/* XXX reading without owning */
1684	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1685		return (err);
1686
1687	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1688
1689	/*
1690	 * NB: nvl_add_zplprop() will read the objset contents,
1691	 * which we aren't supposed to do with a DS_MODE_USER
1692	 * hold, because it could be inconsistent.
1693	 */
1694	if (zc->zc_nvlist_dst != NULL &&
1695	    !zc->zc_objset_stats.dds_inconsistent &&
1696	    dmu_objset_type(os) == DMU_OST_ZFS) {
1697		nvlist_t *nv;
1698
1699		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1700		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1701		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1702		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1703		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1704			err = put_nvlist(zc, nv);
1705		nvlist_free(nv);
1706	} else {
1707		err = ENOENT;
1708	}
1709	dmu_objset_rele(os, FTAG);
1710	return (err);
1711}
1712
1713static boolean_t
1714dataset_name_hidden(const char *name)
1715{
1716	/*
1717	 * Skip over datasets that are not visible in this zone,
1718	 * internal datasets (which have a $ in their name), and
1719	 * temporary datasets (which have a % in their name).
1720	 */
1721	if (strchr(name, '$') != NULL)
1722		return (B_TRUE);
1723	if (strchr(name, '%') != NULL)
1724		return (B_TRUE);
1725	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1726		return (B_TRUE);
1727	return (B_FALSE);
1728}
1729
1730/*
1731 * inputs:
1732 * zc_name		name of filesystem
1733 * zc_cookie		zap cursor
1734 * zc_nvlist_dst_size	size of buffer for property nvlist
1735 *
1736 * outputs:
1737 * zc_name		name of next filesystem
1738 * zc_cookie		zap cursor
1739 * zc_objset_stats	stats
1740 * zc_nvlist_dst	property nvlist
1741 * zc_nvlist_dst_size	size of property nvlist
1742 */
1743static int
1744zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1745{
1746	objset_t *os;
1747	int error;
1748	char *p;
1749	size_t orig_len = strlen(zc->zc_name);
1750
1751top:
1752	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1753		if (error == ENOENT)
1754			error = ESRCH;
1755		return (error);
1756	}
1757
1758	p = strrchr(zc->zc_name, '/');
1759	if (p == NULL || p[1] != '\0')
1760		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1761	p = zc->zc_name + strlen(zc->zc_name);
1762
1763	/*
1764	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1765	 * but is not declared void because its called by dmu_objset_find().
1766	 */
1767	if (zc->zc_cookie == 0) {
1768		uint64_t cookie = 0;
1769		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1770
1771		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0)
1772			(void) dmu_objset_prefetch(p, NULL);
1773	}
1774
1775	do {
1776		error = dmu_dir_list_next(os,
1777		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1778		    NULL, &zc->zc_cookie);
1779		if (error == ENOENT)
1780			error = ESRCH;
1781	} while (error == 0 && dataset_name_hidden(zc->zc_name) &&
1782	    !(zc->zc_iflags & FKIOCTL));
1783	dmu_objset_rele(os, FTAG);
1784
1785	/*
1786	 * If it's an internal dataset (ie. with a '$' in its name),
1787	 * don't try to get stats for it, otherwise we'll return ENOENT.
1788	 */
1789	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
1790		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1791		if (error == ENOENT) {
1792			/* We lost a race with destroy, get the next one. */
1793			zc->zc_name[orig_len] = '\0';
1794			goto top;
1795		}
1796	}
1797	return (error);
1798}
1799
1800/*
1801 * inputs:
1802 * zc_name		name of filesystem
1803 * zc_cookie		zap cursor
1804 * zc_nvlist_dst_size	size of buffer for property nvlist
1805 *
1806 * outputs:
1807 * zc_name		name of next snapshot
1808 * zc_objset_stats	stats
1809 * zc_nvlist_dst	property nvlist
1810 * zc_nvlist_dst_size	size of property nvlist
1811 */
1812static int
1813zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1814{
1815	objset_t *os;
1816	int error;
1817
1818top:
1819	if (zc->zc_cookie == 0)
1820		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
1821		    NULL, DS_FIND_SNAPSHOTS);
1822
1823	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
1824	if (error)
1825		return (error == ENOENT ? ESRCH : error);
1826
1827	/*
1828	 * A dataset name of maximum length cannot have any snapshots,
1829	 * so exit immediately.
1830	 */
1831	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1832		dmu_objset_rele(os, FTAG);
1833		return (ESRCH);
1834	}
1835
1836	error = dmu_snapshot_list_next(os,
1837	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1838	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
1839	dmu_objset_rele(os, FTAG);
1840	if (error == 0) {
1841		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1842		if (error == ENOENT)  {
1843			/* We lost a race with destroy, get the next one. */
1844			*strchr(zc->zc_name, '@') = '\0';
1845			goto top;
1846		}
1847	} else if (error == ENOENT) {
1848		error = ESRCH;
1849	}
1850
1851	/* if we failed, undo the @ that we tacked on to zc_name */
1852	if (error)
1853		*strchr(zc->zc_name, '@') = '\0';
1854	return (error);
1855}
1856
1857static int
1858zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
1859{
1860	const char *propname = nvpair_name(pair);
1861	uint64_t *valary;
1862	unsigned int vallen;
1863	const char *domain;
1864	zfs_userquota_prop_t type;
1865	uint64_t rid;
1866	uint64_t quota;
1867	zfsvfs_t *zfsvfs;
1868	int err;
1869
1870	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
1871		nvlist_t *attrs;
1872		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
1873		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
1874		    &pair) == 0);
1875	}
1876
1877	VERIFY(nvpair_value_uint64_array(pair, &valary, &vallen) == 0);
1878	VERIFY(vallen == 3);
1879	type = valary[0];
1880	rid = valary[1];
1881	quota = valary[2];
1882	/*
1883	 * The propname is encoded as
1884	 * userquota@<rid>-<domain>.
1885	 */
1886	domain = strchr(propname, '-') + 1;
1887
1888	err = zfsvfs_hold(dsname, FTAG, &zfsvfs);
1889	if (err == 0) {
1890		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
1891		zfsvfs_rele(zfsvfs, FTAG);
1892	}
1893
1894	return (err);
1895}
1896
1897/*
1898 * If the named property is one that has a special function to set its value,
1899 * return 0 on success and a positive error code on failure; otherwise if it is
1900 * not one of the special properties handled by this function, return -1.
1901 *
1902 * XXX: It would be better for callers of the properety interface if we handled
1903 * these special cases in dsl_prop.c (in the dsl layer).
1904 */
1905static int
1906zfs_prop_set_special(const char *dsname, zprop_source_t source,
1907    nvpair_t *pair)
1908{
1909	const char *propname = nvpair_name(pair);
1910	zfs_prop_t prop = zfs_name_to_prop(propname);
1911	uint64_t intval;
1912	int err;
1913
1914	if (prop == ZPROP_INVAL) {
1915		if (zfs_prop_userquota(propname))
1916			return (zfs_prop_set_userquota(dsname, pair));
1917		return (-1);
1918	}
1919
1920	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
1921		nvlist_t *attrs;
1922		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
1923		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
1924		    &pair) == 0);
1925	}
1926
1927	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
1928		return (-1);
1929
1930	VERIFY(0 == nvpair_value_uint64(pair, &intval));
1931
1932	switch (prop) {
1933	case ZFS_PROP_QUOTA:
1934		err = dsl_dir_set_quota(dsname, source, intval);
1935		break;
1936	case ZFS_PROP_REFQUOTA:
1937		err = dsl_dataset_set_quota(dsname, source, intval);
1938		break;
1939	case ZFS_PROP_RESERVATION:
1940		err = dsl_dir_set_reservation(dsname, source, intval);
1941		break;
1942	case ZFS_PROP_REFRESERVATION:
1943		err = dsl_dataset_set_reservation(dsname, source, intval);
1944		break;
1945	case ZFS_PROP_VOLSIZE:
1946		err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
1947		    intval);
1948		break;
1949	case ZFS_PROP_VERSION:
1950	{
1951		zfsvfs_t *zfsvfs;
1952
1953		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs)) != 0)
1954			break;
1955
1956		err = zfs_set_version(zfsvfs, intval);
1957		zfsvfs_rele(zfsvfs, FTAG);
1958
1959		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
1960			zfs_cmd_t *zc;
1961
1962			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
1963			(void) strcpy(zc->zc_name, dsname);
1964			(void) zfs_ioc_userspace_upgrade(zc);
1965			kmem_free(zc, sizeof (zfs_cmd_t));
1966		}
1967		break;
1968	}
1969
1970	default:
1971		err = -1;
1972	}
1973
1974	return (err);
1975}
1976
1977/*
1978 * This function is best effort. If it fails to set any of the given properties,
1979 * it continues to set as many as it can and returns the first error
1980 * encountered. If the caller provides a non-NULL errlist, it also gives the
1981 * complete list of names of all the properties it failed to set along with the
1982 * corresponding error numbers. The caller is responsible for freeing the
1983 * returned errlist.
1984 *
1985 * If every property is set successfully, zero is returned and the list pointed
1986 * at by errlist is NULL.
1987 */
1988int
1989zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
1990    nvlist_t **errlist)
1991{
1992	nvpair_t *pair;
1993	nvpair_t *propval;
1994	int rv = 0;
1995	uint64_t intval;
1996	char *strval;
1997	nvlist_t *genericnvl;
1998	nvlist_t *errors;
1999	nvlist_t *retrynvl;
2000
2001	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2002	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2003	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2004
2005retry:
2006	pair = NULL;
2007	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2008		const char *propname = nvpair_name(pair);
2009		zfs_prop_t prop = zfs_name_to_prop(propname);
2010		int err = 0;
2011
2012		/* decode the property value */
2013		propval = pair;
2014		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2015			nvlist_t *attrs;
2016			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2017			VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2018			    &propval) == 0);
2019		}
2020
2021		/* Validate value type */
2022		if (prop == ZPROP_INVAL) {
2023			if (zfs_prop_user(propname)) {
2024				if (nvpair_type(propval) != DATA_TYPE_STRING)
2025					err = EINVAL;
2026			} else if (zfs_prop_userquota(propname)) {
2027				if (nvpair_type(propval) !=
2028				    DATA_TYPE_UINT64_ARRAY)
2029					err = EINVAL;
2030			}
2031		} else {
2032			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2033				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2034					err = EINVAL;
2035			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2036				const char *unused;
2037
2038				VERIFY(nvpair_value_uint64(propval,
2039				    &intval) == 0);
2040
2041				switch (zfs_prop_get_type(prop)) {
2042				case PROP_TYPE_NUMBER:
2043					break;
2044				case PROP_TYPE_STRING:
2045					err = EINVAL;
2046					break;
2047				case PROP_TYPE_INDEX:
2048					if (zfs_prop_index_to_string(prop,
2049					    intval, &unused) != 0)
2050						err = EINVAL;
2051					break;
2052				default:
2053					cmn_err(CE_PANIC,
2054					    "unknown property type");
2055				}
2056			} else {
2057				err = EINVAL;
2058			}
2059		}
2060
2061		/* Validate permissions */
2062		if (err == 0)
2063			err = zfs_check_settable(dsname, pair, CRED());
2064
2065		if (err == 0) {
2066			err = zfs_prop_set_special(dsname, source, pair);
2067			if (err == -1) {
2068				/*
2069				 * For better performance we build up a list of
2070				 * properties to set in a single transaction.
2071				 */
2072				err = nvlist_add_nvpair(genericnvl, pair);
2073			} else if (err != 0 && nvl != retrynvl) {
2074				/*
2075				 * This may be a spurious error caused by
2076				 * receiving quota and reservation out of order.
2077				 * Try again in a second pass.
2078				 */
2079				err = nvlist_add_nvpair(retrynvl, pair);
2080			}
2081		}
2082
2083		if (err != 0)
2084			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2085	}
2086
2087	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2088		nvl = retrynvl;
2089		goto retry;
2090	}
2091
2092	if (!nvlist_empty(genericnvl) &&
2093	    dsl_props_set(dsname, source, genericnvl) != 0) {
2094		/*
2095		 * If this fails, we still want to set as many properties as we
2096		 * can, so try setting them individually.
2097		 */
2098		pair = NULL;
2099		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2100			const char *propname = nvpair_name(pair);
2101			int err = 0;
2102
2103			propval = pair;
2104			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2105				nvlist_t *attrs;
2106				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2107				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2108				    &propval) == 0);
2109			}
2110
2111			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2112				VERIFY(nvpair_value_string(propval,
2113				    &strval) == 0);
2114				err = dsl_prop_set(dsname, propname, source, 1,
2115				    strlen(strval) + 1, strval);
2116			} else {
2117				VERIFY(nvpair_value_uint64(propval,
2118				    &intval) == 0);
2119				err = dsl_prop_set(dsname, propname, source, 8,
2120				    1, &intval);
2121			}
2122
2123			if (err != 0) {
2124				VERIFY(nvlist_add_int32(errors, propname,
2125				    err) == 0);
2126			}
2127		}
2128	}
2129	nvlist_free(genericnvl);
2130	nvlist_free(retrynvl);
2131
2132	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2133		nvlist_free(errors);
2134		errors = NULL;
2135	} else {
2136		VERIFY(nvpair_value_int32(pair, &rv) == 0);
2137	}
2138
2139	if (errlist == NULL)
2140		nvlist_free(errors);
2141	else
2142		*errlist = errors;
2143
2144	return (rv);
2145}
2146
2147/*
2148 * Check that all the properties are valid user properties.
2149 */
2150static int
2151zfs_check_userprops(char *fsname, nvlist_t *nvl)
2152{
2153	nvpair_t *pair = NULL;
2154	int error = 0;
2155
2156	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2157		const char *propname = nvpair_name(pair);
2158		char *valstr;
2159
2160		if (!zfs_prop_user(propname) ||
2161		    nvpair_type(pair) != DATA_TYPE_STRING)
2162			return (EINVAL);
2163
2164		if (error = zfs_secpolicy_write_perms(fsname,
2165		    ZFS_DELEG_PERM_USERPROP, CRED()))
2166			return (error);
2167
2168		if (strlen(propname) >= ZAP_MAXNAMELEN)
2169			return (ENAMETOOLONG);
2170
2171		VERIFY(nvpair_value_string(pair, &valstr) == 0);
2172		if (strlen(valstr) >= ZAP_MAXVALUELEN)
2173			return (E2BIG);
2174	}
2175	return (0);
2176}
2177
2178static void
2179props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2180{
2181	nvpair_t *pair;
2182
2183	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2184
2185	pair = NULL;
2186	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2187		if (nvlist_exists(skipped, nvpair_name(pair)))
2188			continue;
2189
2190		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2191	}
2192}
2193
2194static int
2195clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2196    nvlist_t *skipped)
2197{
2198	int err = 0;
2199	nvlist_t *cleared_props = NULL;
2200	props_skip(props, skipped, &cleared_props);
2201	if (!nvlist_empty(cleared_props)) {
2202		/*
2203		 * Acts on local properties until the dataset has received
2204		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2205		 */
2206		zprop_source_t flags = (ZPROP_SRC_NONE |
2207		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2208		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2209	}
2210	nvlist_free(cleared_props);
2211	return (err);
2212}
2213
2214/*
2215 * inputs:
2216 * zc_name		name of filesystem
2217 * zc_value		name of property to set
2218 * zc_nvlist_src{_size}	nvlist of properties to apply
2219 * zc_cookie		received properties flag
2220 *
2221 * outputs:
2222 * zc_nvlist_dst{_size} error for each unapplied received property
2223 */
2224static int
2225zfs_ioc_set_prop(zfs_cmd_t *zc)
2226{
2227	nvlist_t *nvl;
2228	boolean_t received = zc->zc_cookie;
2229	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2230	    ZPROP_SRC_LOCAL);
2231	nvlist_t *errors = NULL;
2232	int error;
2233
2234	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2235	    zc->zc_iflags, &nvl)) != 0)
2236		return (error);
2237
2238	if (received) {
2239		nvlist_t *origprops;
2240		objset_t *os;
2241
2242		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2243			if (dsl_prop_get_received(os, &origprops) == 0) {
2244				(void) clear_received_props(os,
2245				    zc->zc_name, origprops, nvl);
2246				nvlist_free(origprops);
2247			}
2248
2249			dsl_prop_set_hasrecvd(os);
2250			dmu_objset_rele(os, FTAG);
2251		}
2252	}
2253
2254	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2255
2256	if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2257		(void) put_nvlist(zc, errors);
2258	}
2259
2260	nvlist_free(errors);
2261	nvlist_free(nvl);
2262	return (error);
2263}
2264
2265/*
2266 * inputs:
2267 * zc_name		name of filesystem
2268 * zc_value		name of property to inherit
2269 * zc_cookie		revert to received value if TRUE
2270 *
2271 * outputs:		none
2272 */
2273static int
2274zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2275{
2276	const char *propname = zc->zc_value;
2277	zfs_prop_t prop = zfs_name_to_prop(propname);
2278	boolean_t received = zc->zc_cookie;
2279	zprop_source_t source = (received
2280	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2281	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2282
2283	if (received) {
2284		nvlist_t *dummy;
2285		nvpair_t *pair;
2286		zprop_type_t type;
2287		int err;
2288
2289		/*
2290		 * zfs_prop_set_special() expects properties in the form of an
2291		 * nvpair with type info.
2292		 */
2293		if (prop == ZPROP_INVAL) {
2294			if (!zfs_prop_user(propname))
2295				return (EINVAL);
2296
2297			type = PROP_TYPE_STRING;
2298		} else if (prop == ZFS_PROP_VOLSIZE ||
2299		    prop == ZFS_PROP_VERSION) {
2300			return (EINVAL);
2301		} else {
2302			type = zfs_prop_get_type(prop);
2303		}
2304
2305		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2306
2307		switch (type) {
2308		case PROP_TYPE_STRING:
2309			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2310			break;
2311		case PROP_TYPE_NUMBER:
2312		case PROP_TYPE_INDEX:
2313			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2314			break;
2315		default:
2316			nvlist_free(dummy);
2317			return (EINVAL);
2318		}
2319
2320		pair = nvlist_next_nvpair(dummy, NULL);
2321		err = zfs_prop_set_special(zc->zc_name, source, pair);
2322		nvlist_free(dummy);
2323		if (err != -1)
2324			return (err); /* special property already handled */
2325	} else {
2326		/*
2327		 * Only check this in the non-received case. We want to allow
2328		 * 'inherit -S' to revert non-inheritable properties like quota
2329		 * and reservation to the received or default values even though
2330		 * they are not considered inheritable.
2331		 */
2332		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2333			return (EINVAL);
2334	}
2335
2336	/* the property name has been validated by zfs_secpolicy_inherit() */
2337	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2338}
2339
2340static int
2341zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2342{
2343	nvlist_t *props;
2344	spa_t *spa;
2345	int error;
2346	nvpair_t *pair;
2347
2348	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2349	    zc->zc_iflags, &props))
2350		return (error);
2351
2352	/*
2353	 * If the only property is the configfile, then just do a spa_lookup()
2354	 * to handle the faulted case.
2355	 */
2356	pair = nvlist_next_nvpair(props, NULL);
2357	if (pair != NULL && strcmp(nvpair_name(pair),
2358	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2359	    nvlist_next_nvpair(props, pair) == NULL) {
2360		mutex_enter(&spa_namespace_lock);
2361		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2362			spa_configfile_set(spa, props, B_FALSE);
2363			spa_config_sync(spa, B_FALSE, B_TRUE);
2364		}
2365		mutex_exit(&spa_namespace_lock);
2366		if (spa != NULL) {
2367			nvlist_free(props);
2368			return (0);
2369		}
2370	}
2371
2372	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2373		nvlist_free(props);
2374		return (error);
2375	}
2376
2377	error = spa_prop_set(spa, props);
2378
2379	nvlist_free(props);
2380	spa_close(spa, FTAG);
2381
2382	return (error);
2383}
2384
2385static int
2386zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2387{
2388	spa_t *spa;
2389	int error;
2390	nvlist_t *nvp = NULL;
2391
2392	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2393		/*
2394		 * If the pool is faulted, there may be properties we can still
2395		 * get (such as altroot and cachefile), so attempt to get them
2396		 * anyway.
2397		 */
2398		mutex_enter(&spa_namespace_lock);
2399		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2400			error = spa_prop_get(spa, &nvp);
2401		mutex_exit(&spa_namespace_lock);
2402	} else {
2403		error = spa_prop_get(spa, &nvp);
2404		spa_close(spa, FTAG);
2405	}
2406
2407	if (error == 0 && zc->zc_nvlist_dst != NULL)
2408		error = put_nvlist(zc, nvp);
2409	else
2410		error = EFAULT;
2411
2412	nvlist_free(nvp);
2413	return (error);
2414}
2415
2416/*
2417 * inputs:
2418 * zc_name		name of filesystem
2419 * zc_nvlist_src{_size}	nvlist of delegated permissions
2420 * zc_perm_action	allow/unallow flag
2421 *
2422 * outputs:		none
2423 */
2424static int
2425zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2426{
2427	int error;
2428	nvlist_t *fsaclnv = NULL;
2429
2430	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2431	    zc->zc_iflags, &fsaclnv)) != 0)
2432		return (error);
2433
2434	/*
2435	 * Verify nvlist is constructed correctly
2436	 */
2437	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2438		nvlist_free(fsaclnv);
2439		return (EINVAL);
2440	}
2441
2442	/*
2443	 * If we don't have PRIV_SYS_MOUNT, then validate
2444	 * that user is allowed to hand out each permission in
2445	 * the nvlist(s)
2446	 */
2447
2448	error = secpolicy_zfs(CRED());
2449	if (error) {
2450		if (zc->zc_perm_action == B_FALSE) {
2451			error = dsl_deleg_can_allow(zc->zc_name,
2452			    fsaclnv, CRED());
2453		} else {
2454			error = dsl_deleg_can_unallow(zc->zc_name,
2455			    fsaclnv, CRED());
2456		}
2457	}
2458
2459	if (error == 0)
2460		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2461
2462	nvlist_free(fsaclnv);
2463	return (error);
2464}
2465
2466/*
2467 * inputs:
2468 * zc_name		name of filesystem
2469 *
2470 * outputs:
2471 * zc_nvlist_src{_size}	nvlist of delegated permissions
2472 */
2473static int
2474zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2475{
2476	nvlist_t *nvp;
2477	int error;
2478
2479	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2480		error = put_nvlist(zc, nvp);
2481		nvlist_free(nvp);
2482	}
2483
2484	return (error);
2485}
2486
2487/*
2488 * Search the vfs list for a specified resource.  Returns a pointer to it
2489 * or NULL if no suitable entry is found. The caller of this routine
2490 * is responsible for releasing the returned vfs pointer.
2491 */
2492static vfs_t *
2493zfs_get_vfs(const char *resource)
2494{
2495	struct vfs *vfsp;
2496	struct vfs *vfs_found = NULL;
2497
2498	vfs_list_read_lock();
2499	vfsp = rootvfs;
2500	do {
2501		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2502			VFS_HOLD(vfsp);
2503			vfs_found = vfsp;
2504			break;
2505		}
2506		vfsp = vfsp->vfs_next;
2507	} while (vfsp != rootvfs);
2508	vfs_list_unlock();
2509	return (vfs_found);
2510}
2511
2512/* ARGSUSED */
2513static void
2514zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2515{
2516	zfs_creat_t *zct = arg;
2517
2518	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2519}
2520
2521#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
2522
2523/*
2524 * inputs:
2525 * createprops		list of properties requested by creator
2526 * default_zplver	zpl version to use if unspecified in createprops
2527 * fuids_ok		fuids allowed in this version of the spa?
2528 * os			parent objset pointer (NULL if root fs)
2529 *
2530 * outputs:
2531 * zplprops	values for the zplprops we attach to the master node object
2532 * is_ci	true if requested file system will be purely case-insensitive
2533 *
2534 * Determine the settings for utf8only, normalization and
2535 * casesensitivity.  Specific values may have been requested by the
2536 * creator and/or we can inherit values from the parent dataset.  If
2537 * the file system is of too early a vintage, a creator can not
2538 * request settings for these properties, even if the requested
2539 * setting is the default value.  We don't actually want to create dsl
2540 * properties for these, so remove them from the source nvlist after
2541 * processing.
2542 */
2543static int
2544zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2545    boolean_t fuids_ok, nvlist_t *createprops, nvlist_t *zplprops,
2546    boolean_t *is_ci)
2547{
2548	uint64_t sense = ZFS_PROP_UNDEFINED;
2549	uint64_t norm = ZFS_PROP_UNDEFINED;
2550	uint64_t u8 = ZFS_PROP_UNDEFINED;
2551
2552	ASSERT(zplprops != NULL);
2553
2554	/*
2555	 * Pull out creator prop choices, if any.
2556	 */
2557	if (createprops) {
2558		(void) nvlist_lookup_uint64(createprops,
2559		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2560		(void) nvlist_lookup_uint64(createprops,
2561		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2562		(void) nvlist_remove_all(createprops,
2563		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2564		(void) nvlist_lookup_uint64(createprops,
2565		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2566		(void) nvlist_remove_all(createprops,
2567		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2568		(void) nvlist_lookup_uint64(createprops,
2569		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2570		(void) nvlist_remove_all(createprops,
2571		    zfs_prop_to_name(ZFS_PROP_CASE));
2572	}
2573
2574	/*
2575	 * If the zpl version requested is whacky or the file system
2576	 * or pool is version is too "young" to support normalization
2577	 * and the creator tried to set a value for one of the props,
2578	 * error out.
2579	 */
2580	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2581	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2582	    (zplver < ZPL_VERSION_NORMALIZATION &&
2583	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2584	    sense != ZFS_PROP_UNDEFINED)))
2585		return (ENOTSUP);
2586
2587	/*
2588	 * Put the version in the zplprops
2589	 */
2590	VERIFY(nvlist_add_uint64(zplprops,
2591	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2592
2593	if (norm == ZFS_PROP_UNDEFINED)
2594		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2595	VERIFY(nvlist_add_uint64(zplprops,
2596	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2597
2598	/*
2599	 * If we're normalizing, names must always be valid UTF-8 strings.
2600	 */
2601	if (norm)
2602		u8 = 1;
2603	if (u8 == ZFS_PROP_UNDEFINED)
2604		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2605	VERIFY(nvlist_add_uint64(zplprops,
2606	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2607
2608	if (sense == ZFS_PROP_UNDEFINED)
2609		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2610	VERIFY(nvlist_add_uint64(zplprops,
2611	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2612
2613	if (is_ci)
2614		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
2615
2616	return (0);
2617}
2618
2619static int
2620zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2621    nvlist_t *zplprops, boolean_t *is_ci)
2622{
2623	boolean_t fuids_ok = B_TRUE;
2624	uint64_t zplver = ZPL_VERSION;
2625	objset_t *os = NULL;
2626	char parentname[MAXNAMELEN];
2627	char *cp;
2628	int error;
2629
2630	(void) strlcpy(parentname, dataset, sizeof (parentname));
2631	cp = strrchr(parentname, '/');
2632	ASSERT(cp != NULL);
2633	cp[0] = '\0';
2634
2635	if (zfs_earlier_version(dataset, SPA_VERSION_USERSPACE))
2636		zplver = ZPL_VERSION_USERSPACE - 1;
2637	if (zfs_earlier_version(dataset, SPA_VERSION_FUID)) {
2638		zplver = ZPL_VERSION_FUID - 1;
2639		fuids_ok = B_FALSE;
2640	}
2641
2642	/*
2643	 * Open parent object set so we can inherit zplprop values.
2644	 */
2645	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2646		return (error);
2647
2648	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, createprops,
2649	    zplprops, is_ci);
2650	dmu_objset_rele(os, FTAG);
2651	return (error);
2652}
2653
2654static int
2655zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2656    nvlist_t *zplprops, boolean_t *is_ci)
2657{
2658	boolean_t fuids_ok = B_TRUE;
2659	uint64_t zplver = ZPL_VERSION;
2660	int error;
2661
2662	if (spa_vers < SPA_VERSION_FUID) {
2663		zplver = ZPL_VERSION_FUID - 1;
2664		fuids_ok = B_FALSE;
2665	}
2666
2667	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, createprops,
2668	    zplprops, is_ci);
2669	return (error);
2670}
2671
2672/*
2673 * inputs:
2674 * zc_objset_type	type of objset to create (fs vs zvol)
2675 * zc_name		name of new objset
2676 * zc_value		name of snapshot to clone from (may be empty)
2677 * zc_nvlist_src{_size}	nvlist of properties to apply
2678 *
2679 * outputs: none
2680 */
2681static int
2682zfs_ioc_create(zfs_cmd_t *zc)
2683{
2684	objset_t *clone;
2685	int error = 0;
2686	zfs_creat_t zct;
2687	nvlist_t *nvprops = NULL;
2688	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2689	dmu_objset_type_t type = zc->zc_objset_type;
2690
2691	switch (type) {
2692
2693	case DMU_OST_ZFS:
2694		cbfunc = zfs_create_cb;
2695		break;
2696
2697	case DMU_OST_ZVOL:
2698		cbfunc = zvol_create_cb;
2699		break;
2700
2701	default:
2702		cbfunc = NULL;
2703		break;
2704	}
2705	if (strchr(zc->zc_name, '@') ||
2706	    strchr(zc->zc_name, '%'))
2707		return (EINVAL);
2708
2709	if (zc->zc_nvlist_src != NULL &&
2710	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2711	    zc->zc_iflags, &nvprops)) != 0)
2712		return (error);
2713
2714	zct.zct_zplprops = NULL;
2715	zct.zct_props = nvprops;
2716
2717	if (zc->zc_value[0] != '\0') {
2718		/*
2719		 * We're creating a clone of an existing snapshot.
2720		 */
2721		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2722		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2723			nvlist_free(nvprops);
2724			return (EINVAL);
2725		}
2726
2727		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2728		if (error) {
2729			nvlist_free(nvprops);
2730			return (error);
2731		}
2732
2733		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2734		dmu_objset_rele(clone, FTAG);
2735		if (error) {
2736			nvlist_free(nvprops);
2737			return (error);
2738		}
2739	} else {
2740		boolean_t is_insensitive = B_FALSE;
2741
2742		if (cbfunc == NULL) {
2743			nvlist_free(nvprops);
2744			return (EINVAL);
2745		}
2746
2747		if (type == DMU_OST_ZVOL) {
2748			uint64_t volsize, volblocksize;
2749
2750			if (nvprops == NULL ||
2751			    nvlist_lookup_uint64(nvprops,
2752			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2753			    &volsize) != 0) {
2754				nvlist_free(nvprops);
2755				return (EINVAL);
2756			}
2757
2758			if ((error = nvlist_lookup_uint64(nvprops,
2759			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2760			    &volblocksize)) != 0 && error != ENOENT) {
2761				nvlist_free(nvprops);
2762				return (EINVAL);
2763			}
2764
2765			if (error != 0)
2766				volblocksize = zfs_prop_default_numeric(
2767				    ZFS_PROP_VOLBLOCKSIZE);
2768
2769			if ((error = zvol_check_volblocksize(
2770			    volblocksize)) != 0 ||
2771			    (error = zvol_check_volsize(volsize,
2772			    volblocksize)) != 0) {
2773				nvlist_free(nvprops);
2774				return (error);
2775			}
2776		} else if (type == DMU_OST_ZFS) {
2777			int error;
2778
2779			/*
2780			 * We have to have normalization and
2781			 * case-folding flags correct when we do the
2782			 * file system creation, so go figure them out
2783			 * now.
2784			 */
2785			VERIFY(nvlist_alloc(&zct.zct_zplprops,
2786			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2787			error = zfs_fill_zplprops(zc->zc_name, nvprops,
2788			    zct.zct_zplprops, &is_insensitive);
2789			if (error != 0) {
2790				nvlist_free(nvprops);
2791				nvlist_free(zct.zct_zplprops);
2792				return (error);
2793			}
2794		}
2795		error = dmu_objset_create(zc->zc_name, type,
2796		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
2797		nvlist_free(zct.zct_zplprops);
2798	}
2799
2800	/*
2801	 * It would be nice to do this atomically.
2802	 */
2803	if (error == 0) {
2804		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
2805		    nvprops, NULL);
2806		if (error != 0)
2807			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
2808	}
2809	nvlist_free(nvprops);
2810	return (error);
2811}
2812
2813/*
2814 * inputs:
2815 * zc_name	name of filesystem
2816 * zc_value	short name of snapshot
2817 * zc_cookie	recursive flag
2818 * zc_nvlist_src[_size] property list
2819 *
2820 * outputs:
2821 * zc_value	short snapname (i.e. part after the '@')
2822 */
2823static int
2824zfs_ioc_snapshot(zfs_cmd_t *zc)
2825{
2826	nvlist_t *nvprops = NULL;
2827	int error;
2828	boolean_t recursive = zc->zc_cookie;
2829
2830	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2831		return (EINVAL);
2832
2833	if (zc->zc_nvlist_src != NULL &&
2834	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2835	    zc->zc_iflags, &nvprops)) != 0)
2836		return (error);
2837
2838	error = zfs_check_userprops(zc->zc_name, nvprops);
2839	if (error)
2840		goto out;
2841
2842	if (!nvlist_empty(nvprops) &&
2843	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
2844		error = ENOTSUP;
2845		goto out;
2846	}
2847
2848	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value,
2849	    nvprops, recursive);
2850
2851out:
2852	nvlist_free(nvprops);
2853	return (error);
2854}
2855
2856int
2857zfs_unmount_snap(const char *name, void *arg)
2858{
2859	vfs_t *vfsp = NULL;
2860
2861	if (arg) {
2862		char *snapname = arg;
2863		char *fullname = kmem_asprintf("%s@%s", name, snapname);
2864		vfsp = zfs_get_vfs(fullname);
2865		strfree(fullname);
2866	} else if (strchr(name, '@')) {
2867		vfsp = zfs_get_vfs(name);
2868	}
2869
2870	if (vfsp) {
2871		/*
2872		 * Always force the unmount for snapshots.
2873		 */
2874		int flag = MS_FORCE;
2875		int err;
2876
2877		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
2878			VFS_RELE(vfsp);
2879			return (err);
2880		}
2881		VFS_RELE(vfsp);
2882		if ((err = dounmount(vfsp, flag, kcred)) != 0)
2883			return (err);
2884	}
2885	return (0);
2886}
2887
2888/*
2889 * inputs:
2890 * zc_name		name of filesystem
2891 * zc_value		short name of snapshot
2892 * zc_defer_destroy	mark for deferred destroy
2893 *
2894 * outputs:	none
2895 */
2896static int
2897zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
2898{
2899	int err;
2900
2901	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2902		return (EINVAL);
2903	err = dmu_objset_find(zc->zc_name,
2904	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
2905	if (err)
2906		return (err);
2907	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
2908	    zc->zc_defer_destroy));
2909}
2910
2911/*
2912 * inputs:
2913 * zc_name		name of dataset to destroy
2914 * zc_objset_type	type of objset
2915 * zc_defer_destroy	mark for deferred destroy
2916 *
2917 * outputs:		none
2918 */
2919static int
2920zfs_ioc_destroy(zfs_cmd_t *zc)
2921{
2922	int err;
2923	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
2924		err = zfs_unmount_snap(zc->zc_name, NULL);
2925		if (err)
2926			return (err);
2927	}
2928
2929	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
2930	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
2931		(void) zvol_remove_minor(zc->zc_name);
2932	return (err);
2933}
2934
2935/*
2936 * inputs:
2937 * zc_name	name of dataset to rollback (to most recent snapshot)
2938 *
2939 * outputs:	none
2940 */
2941static int
2942zfs_ioc_rollback(zfs_cmd_t *zc)
2943{
2944	dsl_dataset_t *ds, *clone;
2945	int error;
2946	zfsvfs_t *zfsvfs;
2947	char *clone_name;
2948
2949	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
2950	if (error)
2951		return (error);
2952
2953	/* must not be a snapshot */
2954	if (dsl_dataset_is_snapshot(ds)) {
2955		dsl_dataset_rele(ds, FTAG);
2956		return (EINVAL);
2957	}
2958
2959	/* must have a most recent snapshot */
2960	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
2961		dsl_dataset_rele(ds, FTAG);
2962		return (EINVAL);
2963	}
2964
2965	/*
2966	 * Create clone of most recent snapshot.
2967	 */
2968	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
2969	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
2970	if (error)
2971		goto out;
2972
2973	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
2974	if (error)
2975		goto out;
2976
2977	/*
2978	 * Do clone swap.
2979	 */
2980	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
2981		error = zfs_suspend_fs(zfsvfs);
2982		if (error == 0) {
2983			int resume_err;
2984
2985			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
2986				error = dsl_dataset_clone_swap(clone, ds,
2987				    B_TRUE);
2988				dsl_dataset_disown(ds, FTAG);
2989				ds = NULL;
2990			} else {
2991				error = EBUSY;
2992			}
2993			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
2994			error = error ? error : resume_err;
2995		}
2996		VFS_RELE(zfsvfs->z_vfs);
2997	} else {
2998		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
2999			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3000			dsl_dataset_disown(ds, FTAG);
3001			ds = NULL;
3002		} else {
3003			error = EBUSY;
3004		}
3005	}
3006
3007	/*
3008	 * Destroy clone (which also closes it).
3009	 */
3010	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3011
3012out:
3013	strfree(clone_name);
3014	if (ds)
3015		dsl_dataset_rele(ds, FTAG);
3016	return (error);
3017}
3018
3019/*
3020 * inputs:
3021 * zc_name	old name of dataset
3022 * zc_value	new name of dataset
3023 * zc_cookie	recursive flag (only valid for snapshots)
3024 *
3025 * outputs:	none
3026 */
3027static int
3028zfs_ioc_rename(zfs_cmd_t *zc)
3029{
3030	boolean_t recursive = zc->zc_cookie & 1;
3031
3032	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3033	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3034	    strchr(zc->zc_value, '%'))
3035		return (EINVAL);
3036
3037	/*
3038	 * Unmount snapshot unless we're doing a recursive rename,
3039	 * in which case the dataset code figures out which snapshots
3040	 * to unmount.
3041	 */
3042	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3043	    zc->zc_objset_type == DMU_OST_ZFS) {
3044		int err = zfs_unmount_snap(zc->zc_name, NULL);
3045		if (err)
3046			return (err);
3047	}
3048	if (zc->zc_objset_type == DMU_OST_ZVOL)
3049		(void) zvol_remove_minor(zc->zc_name);
3050	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3051}
3052
3053static int
3054zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3055{
3056	const char *propname = nvpair_name(pair);
3057	boolean_t issnap = (strchr(dsname, '@') != NULL);
3058	zfs_prop_t prop = zfs_name_to_prop(propname);
3059	uint64_t intval;
3060	int err;
3061
3062	if (prop == ZPROP_INVAL) {
3063		if (zfs_prop_user(propname)) {
3064			if (err = zfs_secpolicy_write_perms(dsname,
3065			    ZFS_DELEG_PERM_USERPROP, cr))
3066				return (err);
3067			return (0);
3068		}
3069
3070		if (!issnap && zfs_prop_userquota(propname)) {
3071			const char *perm = NULL;
3072			const char *uq_prefix =
3073			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3074			const char *gq_prefix =
3075			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3076
3077			if (strncmp(propname, uq_prefix,
3078			    strlen(uq_prefix)) == 0) {
3079				perm = ZFS_DELEG_PERM_USERQUOTA;
3080			} else if (strncmp(propname, gq_prefix,
3081			    strlen(gq_prefix)) == 0) {
3082				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3083			} else {
3084				/* USERUSED and GROUPUSED are read-only */
3085				return (EINVAL);
3086			}
3087
3088			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3089				return (err);
3090			return (0);
3091		}
3092
3093		return (EINVAL);
3094	}
3095
3096	if (issnap)
3097		return (EINVAL);
3098
3099	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3100		/*
3101		 * dsl_prop_get_all_impl() returns properties in this
3102		 * format.
3103		 */
3104		nvlist_t *attrs;
3105		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3106		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3107		    &pair) == 0);
3108	}
3109
3110	/*
3111	 * Check that this value is valid for this pool version
3112	 */
3113	switch (prop) {
3114	case ZFS_PROP_COMPRESSION:
3115		/*
3116		 * If the user specified gzip compression, make sure
3117		 * the SPA supports it. We ignore any errors here since
3118		 * we'll catch them later.
3119		 */
3120		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3121		    nvpair_value_uint64(pair, &intval) == 0) {
3122			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3123			    intval <= ZIO_COMPRESS_GZIP_9 &&
3124			    zfs_earlier_version(dsname,
3125			    SPA_VERSION_GZIP_COMPRESSION)) {
3126				return (ENOTSUP);
3127			}
3128
3129			if (intval == ZIO_COMPRESS_ZLE &&
3130			    zfs_earlier_version(dsname,
3131			    SPA_VERSION_ZLE_COMPRESSION))
3132				return (ENOTSUP);
3133
3134			/*
3135			 * If this is a bootable dataset then
3136			 * verify that the compression algorithm
3137			 * is supported for booting. We must return
3138			 * something other than ENOTSUP since it
3139			 * implies a downrev pool version.
3140			 */
3141			if (zfs_is_bootfs(dsname) &&
3142			    !BOOTFS_COMPRESS_VALID(intval)) {
3143				return (ERANGE);
3144			}
3145		}
3146		break;
3147
3148	case ZFS_PROP_COPIES:
3149		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3150			return (ENOTSUP);
3151		break;
3152
3153	case ZFS_PROP_DEDUP:
3154		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3155			return (ENOTSUP);
3156		break;
3157
3158	case ZFS_PROP_SHARESMB:
3159		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3160			return (ENOTSUP);
3161		break;
3162
3163	case ZFS_PROP_ACLINHERIT:
3164		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3165		    nvpair_value_uint64(pair, &intval) == 0) {
3166			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3167			    zfs_earlier_version(dsname,
3168			    SPA_VERSION_PASSTHROUGH_X))
3169				return (ENOTSUP);
3170		}
3171		break;
3172	}
3173
3174	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3175}
3176
3177/*
3178 * Removes properties from the given props list that fail permission checks
3179 * needed to clear them and to restore them in case of a receive error. For each
3180 * property, make sure we have both set and inherit permissions.
3181 *
3182 * Returns the first error encountered if any permission checks fail. If the
3183 * caller provides a non-NULL errlist, it also gives the complete list of names
3184 * of all the properties that failed a permission check along with the
3185 * corresponding error numbers. The caller is responsible for freeing the
3186 * returned errlist.
3187 *
3188 * If every property checks out successfully, zero is returned and the list
3189 * pointed at by errlist is NULL.
3190 */
3191static int
3192zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3193{
3194	zfs_cmd_t *zc;
3195	nvpair_t *pair, *next_pair;
3196	nvlist_t *errors;
3197	int err, rv = 0;
3198
3199	if (props == NULL)
3200		return (0);
3201
3202	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3203
3204	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3205	(void) strcpy(zc->zc_name, dataset);
3206	pair = nvlist_next_nvpair(props, NULL);
3207	while (pair != NULL) {
3208		next_pair = nvlist_next_nvpair(props, pair);
3209
3210		(void) strcpy(zc->zc_value, nvpair_name(pair));
3211		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3212		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3213			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3214			VERIFY(nvlist_add_int32(errors,
3215			    zc->zc_value, err) == 0);
3216		}
3217		pair = next_pair;
3218	}
3219	kmem_free(zc, sizeof (zfs_cmd_t));
3220
3221	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3222		nvlist_free(errors);
3223		errors = NULL;
3224	} else {
3225		VERIFY(nvpair_value_int32(pair, &rv) == 0);
3226	}
3227
3228	if (errlist == NULL)
3229		nvlist_free(errors);
3230	else
3231		*errlist = errors;
3232
3233	return (rv);
3234}
3235
3236static boolean_t
3237propval_equals(nvpair_t *p1, nvpair_t *p2)
3238{
3239	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3240		/* dsl_prop_get_all_impl() format */
3241		nvlist_t *attrs;
3242		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3243		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3244		    &p1) == 0);
3245	}
3246
3247	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3248		nvlist_t *attrs;
3249		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3250		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3251		    &p2) == 0);
3252	}
3253
3254	if (nvpair_type(p1) != nvpair_type(p2))
3255		return (B_FALSE);
3256
3257	if (nvpair_type(p1) == DATA_TYPE_STRING) {
3258		char *valstr1, *valstr2;
3259
3260		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3261		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3262		return (strcmp(valstr1, valstr2) == 0);
3263	} else {
3264		uint64_t intval1, intval2;
3265
3266		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3267		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3268		return (intval1 == intval2);
3269	}
3270}
3271
3272/*
3273 * Remove properties from props if they are not going to change (as determined
3274 * by comparison with origprops). Remove them from origprops as well, since we
3275 * do not need to clear or restore properties that won't change.
3276 */
3277static void
3278props_reduce(nvlist_t *props, nvlist_t *origprops)
3279{
3280	nvpair_t *pair, *next_pair;
3281
3282	if (origprops == NULL)
3283		return; /* all props need to be received */
3284
3285	pair = nvlist_next_nvpair(props, NULL);
3286	while (pair != NULL) {
3287		const char *propname = nvpair_name(pair);
3288		nvpair_t *match;
3289
3290		next_pair = nvlist_next_nvpair(props, pair);
3291
3292		if ((nvlist_lookup_nvpair(origprops, propname,
3293		    &match) != 0) || !propval_equals(pair, match))
3294			goto next; /* need to set received value */
3295
3296		/* don't clear the existing received value */
3297		(void) nvlist_remove_nvpair(origprops, match);
3298		/* don't bother receiving the property */
3299		(void) nvlist_remove_nvpair(props, pair);
3300next:
3301		pair = next_pair;
3302	}
3303}
3304
3305#ifdef	DEBUG
3306static boolean_t zfs_ioc_recv_inject_err;
3307#endif
3308
3309/*
3310 * inputs:
3311 * zc_name		name of containing filesystem
3312 * zc_nvlist_src{_size}	nvlist of properties to apply
3313 * zc_value		name of snapshot to create
3314 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
3315 * zc_cookie		file descriptor to recv from
3316 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
3317 * zc_guid		force flag
3318 *
3319 * outputs:
3320 * zc_cookie		number of bytes read
3321 * zc_nvlist_dst{_size} error for each unapplied received property
3322 * zc_obj		zprop_errflags_t
3323 */
3324static int
3325zfs_ioc_recv(zfs_cmd_t *zc)
3326{
3327	file_t *fp;
3328	objset_t *os;
3329	dmu_recv_cookie_t drc;
3330	boolean_t force = (boolean_t)zc->zc_guid;
3331	int fd;
3332	int error = 0;
3333	int props_error = 0;
3334	nvlist_t *errors;
3335	offset_t off;
3336	nvlist_t *props = NULL; /* sent properties */
3337	nvlist_t *origprops = NULL; /* existing properties */
3338	objset_t *origin = NULL;
3339	char *tosnap;
3340	char tofs[ZFS_MAXNAMELEN];
3341	boolean_t first_recvd_props = B_FALSE;
3342
3343	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3344	    strchr(zc->zc_value, '@') == NULL ||
3345	    strchr(zc->zc_value, '%'))
3346		return (EINVAL);
3347
3348	(void) strcpy(tofs, zc->zc_value);
3349	tosnap = strchr(tofs, '@');
3350	*tosnap++ = '\0';
3351
3352	if (zc->zc_nvlist_src != NULL &&
3353	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3354	    zc->zc_iflags, &props)) != 0)
3355		return (error);
3356
3357	fd = zc->zc_cookie;
3358	fp = getf(fd);
3359	if (fp == NULL) {
3360		nvlist_free(props);
3361		return (EBADF);
3362	}
3363
3364	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3365
3366	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3367		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3368		    !dsl_prop_get_hasrecvd(os)) {
3369			first_recvd_props = B_TRUE;
3370		}
3371
3372		/*
3373		 * If new received properties are supplied, they are to
3374		 * completely replace the existing received properties, so stash
3375		 * away the existing ones.
3376		 */
3377		if (dsl_prop_get_received(os, &origprops) == 0) {
3378			nvlist_t *errlist = NULL;
3379			/*
3380			 * Don't bother writing a property if its value won't
3381			 * change (and avoid the unnecessary security checks).
3382			 *
3383			 * The first receive after SPA_VERSION_RECVD_PROPS is a
3384			 * special case where we blow away all local properties
3385			 * regardless.
3386			 */
3387			if (!first_recvd_props)
3388				props_reduce(props, origprops);
3389			if (zfs_check_clearable(tofs, origprops,
3390			    &errlist) != 0)
3391				(void) nvlist_merge(errors, errlist, 0);
3392			nvlist_free(errlist);
3393		}
3394
3395		dmu_objset_rele(os, FTAG);
3396	}
3397
3398	if (zc->zc_string[0]) {
3399		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3400		if (error)
3401			goto out;
3402	}
3403
3404	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3405	    &zc->zc_begin_record, force, origin, &drc);
3406	if (origin)
3407		dmu_objset_rele(origin, FTAG);
3408	if (error)
3409		goto out;
3410
3411	/*
3412	 * Set properties before we receive the stream so that they are applied
3413	 * to the new data. Note that we must call dmu_recv_stream() if
3414	 * dmu_recv_begin() succeeds.
3415	 */
3416	if (props) {
3417		nvlist_t *errlist;
3418
3419		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3420			if (drc.drc_newfs) {
3421				if (spa_version(os->os_spa) >=
3422				    SPA_VERSION_RECVD_PROPS)
3423					first_recvd_props = B_TRUE;
3424			} else if (origprops != NULL) {
3425				if (clear_received_props(os, tofs, origprops,
3426				    first_recvd_props ? NULL : props) != 0)
3427					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3428			} else {
3429				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3430			}
3431			dsl_prop_set_hasrecvd(os);
3432		} else if (!drc.drc_newfs) {
3433			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3434		}
3435
3436		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3437		    props, &errlist);
3438		(void) nvlist_merge(errors, errlist, 0);
3439		nvlist_free(errlist);
3440	}
3441
3442	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3443		/*
3444		 * Caller made zc->zc_nvlist_dst less than the minimum expected
3445		 * size or supplied an invalid address.
3446		 */
3447		props_error = EINVAL;
3448	}
3449
3450	off = fp->f_offset;
3451	error = dmu_recv_stream(&drc, fp->f_vnode, &off);
3452
3453	if (error == 0) {
3454		zfsvfs_t *zfsvfs = NULL;
3455
3456		if (getzfsvfs(tofs, &zfsvfs) == 0) {
3457			/* online recv */
3458			int end_err;
3459
3460			error = zfs_suspend_fs(zfsvfs);
3461			/*
3462			 * If the suspend fails, then the recv_end will
3463			 * likely also fail, and clean up after itself.
3464			 */
3465			end_err = dmu_recv_end(&drc);
3466			if (error == 0)
3467				error = zfs_resume_fs(zfsvfs, tofs);
3468			error = error ? error : end_err;
3469			VFS_RELE(zfsvfs->z_vfs);
3470		} else {
3471			error = dmu_recv_end(&drc);
3472		}
3473	}
3474
3475	zc->zc_cookie = off - fp->f_offset;
3476	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3477		fp->f_offset = off;
3478
3479#ifdef	DEBUG
3480	if (zfs_ioc_recv_inject_err) {
3481		zfs_ioc_recv_inject_err = B_FALSE;
3482		error = 1;
3483	}
3484#endif
3485	/*
3486	 * On error, restore the original props.
3487	 */
3488	if (error && props) {
3489		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3490			if (clear_received_props(os, tofs, props, NULL) != 0) {
3491				/*
3492				 * We failed to clear the received properties.
3493				 * Since we may have left a $recvd value on the
3494				 * system, we can't clear the $hasrecvd flag.
3495				 */
3496				zc->zc_obj |= ZPROP_ERR_NORESTORE;
3497			} else if (first_recvd_props) {
3498				dsl_prop_unset_hasrecvd(os);
3499			}
3500			dmu_objset_rele(os, FTAG);
3501		} else if (!drc.drc_newfs) {
3502			/* We failed to clear the received properties. */
3503			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3504		}
3505
3506		if (origprops == NULL && !drc.drc_newfs) {
3507			/* We failed to stash the original properties. */
3508			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3509		}
3510
3511		/*
3512		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
3513		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3514		 * explictly if we're restoring local properties cleared in the
3515		 * first new-style receive.
3516		 */
3517		if (origprops != NULL &&
3518		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3519		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3520		    origprops, NULL) != 0) {
3521			/*
3522			 * We stashed the original properties but failed to
3523			 * restore them.
3524			 */
3525			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3526		}
3527	}
3528out:
3529	nvlist_free(props);
3530	nvlist_free(origprops);
3531	nvlist_free(errors);
3532	releasef(fd);
3533
3534	if (error == 0)
3535		error = props_error;
3536
3537	return (error);
3538}
3539
3540/*
3541 * inputs:
3542 * zc_name	name of snapshot to send
3543 * zc_value	short name of incremental fromsnap (may be empty)
3544 * zc_cookie	file descriptor to send stream to
3545 * zc_obj	fromorigin flag (mutually exclusive with zc_value)
3546 *
3547 * outputs: none
3548 */
3549static int
3550zfs_ioc_send(zfs_cmd_t *zc)
3551{
3552	objset_t *fromsnap = NULL;
3553	objset_t *tosnap;
3554	file_t *fp;
3555	int error;
3556	offset_t off;
3557
3558	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
3559	if (error)
3560		return (error);
3561
3562	if (zc->zc_value[0] != '\0') {
3563		char *buf;
3564		char *cp;
3565
3566		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3567		(void) strncpy(buf, zc->zc_name, MAXPATHLEN);
3568		cp = strchr(buf, '@');
3569		if (cp)
3570			*(cp+1) = 0;
3571		(void) strncat(buf, zc->zc_value, MAXPATHLEN);
3572		error = dmu_objset_hold(buf, FTAG, &fromsnap);
3573		kmem_free(buf, MAXPATHLEN);
3574		if (error) {
3575			dmu_objset_rele(tosnap, FTAG);
3576			return (error);
3577		}
3578	}
3579
3580	fp = getf(zc->zc_cookie);
3581	if (fp == NULL) {
3582		dmu_objset_rele(tosnap, FTAG);
3583		if (fromsnap)
3584			dmu_objset_rele(fromsnap, FTAG);
3585		return (EBADF);
3586	}
3587
3588	off = fp->f_offset;
3589	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
3590
3591	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3592		fp->f_offset = off;
3593	releasef(zc->zc_cookie);
3594	if (fromsnap)
3595		dmu_objset_rele(fromsnap, FTAG);
3596	dmu_objset_rele(tosnap, FTAG);
3597	return (error);
3598}
3599
3600static int
3601zfs_ioc_inject_fault(zfs_cmd_t *zc)
3602{
3603	int id, error;
3604
3605	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3606	    &zc->zc_inject_record);
3607
3608	if (error == 0)
3609		zc->zc_guid = (uint64_t)id;
3610
3611	return (error);
3612}
3613
3614static int
3615zfs_ioc_clear_fault(zfs_cmd_t *zc)
3616{
3617	return (zio_clear_fault((int)zc->zc_guid));
3618}
3619
3620static int
3621zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3622{
3623	int id = (int)zc->zc_guid;
3624	int error;
3625
3626	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3627	    &zc->zc_inject_record);
3628
3629	zc->zc_guid = id;
3630
3631	return (error);
3632}
3633
3634static int
3635zfs_ioc_error_log(zfs_cmd_t *zc)
3636{
3637	spa_t *spa;
3638	int error;
3639	size_t count = (size_t)zc->zc_nvlist_dst_size;
3640
3641	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
3642		return (error);
3643
3644	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
3645	    &count);
3646	if (error == 0)
3647		zc->zc_nvlist_dst_size = count;
3648	else
3649		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
3650
3651	spa_close(spa, FTAG);
3652
3653	return (error);
3654}
3655
3656static int
3657zfs_ioc_clear(zfs_cmd_t *zc)
3658{
3659	spa_t *spa;
3660	vdev_t *vd;
3661	int error;
3662
3663	/*
3664	 * On zpool clear we also fix up missing slogs
3665	 */
3666	mutex_enter(&spa_namespace_lock);
3667	spa = spa_lookup(zc->zc_name);
3668	if (spa == NULL) {
3669		mutex_exit(&spa_namespace_lock);
3670		return (EIO);
3671	}
3672	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
3673		/* we need to let spa_open/spa_load clear the chains */
3674		spa_set_log_state(spa, SPA_LOG_CLEAR);
3675	}
3676	spa->spa_last_open_failed = 0;
3677	mutex_exit(&spa_namespace_lock);
3678
3679	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
3680		error = spa_open(zc->zc_name, &spa, FTAG);
3681	} else {
3682		nvlist_t *policy;
3683		nvlist_t *config = NULL;
3684
3685		if (zc->zc_nvlist_src == NULL)
3686			return (EINVAL);
3687
3688		if ((error = get_nvlist(zc->zc_nvlist_src,
3689		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
3690			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
3691			    policy, &config);
3692			if (config != NULL) {
3693				(void) put_nvlist(zc, config);
3694				nvlist_free(config);
3695			}
3696			nvlist_free(policy);
3697		}
3698	}
3699
3700	if (error)
3701		return (error);
3702
3703	spa_vdev_state_enter(spa, SCL_NONE);
3704
3705	if (zc->zc_guid == 0) {
3706		vd = NULL;
3707	} else {
3708		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
3709		if (vd == NULL) {
3710			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
3711			spa_close(spa, FTAG);
3712			return (ENODEV);
3713		}
3714	}
3715
3716	vdev_clear(spa, vd);
3717
3718	(void) spa_vdev_state_exit(spa, NULL, 0);
3719
3720	/*
3721	 * Resume any suspended I/Os.
3722	 */
3723	if (zio_resume(spa) != 0)
3724		error = EIO;
3725
3726	spa_close(spa, FTAG);
3727
3728	return (error);
3729}
3730
3731/*
3732 * inputs:
3733 * zc_name	name of filesystem
3734 * zc_value	name of origin snapshot
3735 *
3736 * outputs:
3737 * zc_string	name of conflicting snapshot, if there is one
3738 */
3739static int
3740zfs_ioc_promote(zfs_cmd_t *zc)
3741{
3742	char *cp;
3743
3744	/*
3745	 * We don't need to unmount *all* the origin fs's snapshots, but
3746	 * it's easier.
3747	 */
3748	cp = strchr(zc->zc_value, '@');
3749	if (cp)
3750		*cp = '\0';
3751	(void) dmu_objset_find(zc->zc_value,
3752	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
3753	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
3754}
3755
3756/*
3757 * Retrieve a single {user|group}{used|quota}@... property.
3758 *
3759 * inputs:
3760 * zc_name	name of filesystem
3761 * zc_objset_type zfs_userquota_prop_t
3762 * zc_value	domain name (eg. "S-1-234-567-89")
3763 * zc_guid	RID/UID/GID
3764 *
3765 * outputs:
3766 * zc_cookie	property value
3767 */
3768static int
3769zfs_ioc_userspace_one(zfs_cmd_t *zc)
3770{
3771	zfsvfs_t *zfsvfs;
3772	int error;
3773
3774	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
3775		return (EINVAL);
3776
3777	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
3778	if (error)
3779		return (error);
3780
3781	error = zfs_userspace_one(zfsvfs,
3782	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
3783	zfsvfs_rele(zfsvfs, FTAG);
3784
3785	return (error);
3786}
3787
3788/*
3789 * inputs:
3790 * zc_name		name of filesystem
3791 * zc_cookie		zap cursor
3792 * zc_objset_type	zfs_userquota_prop_t
3793 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
3794 *
3795 * outputs:
3796 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
3797 * zc_cookie	zap cursor
3798 */
3799static int
3800zfs_ioc_userspace_many(zfs_cmd_t *zc)
3801{
3802	zfsvfs_t *zfsvfs;
3803	int error;
3804
3805	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
3806	if (error)
3807		return (error);
3808
3809	int bufsize = zc->zc_nvlist_dst_size;
3810	void *buf = kmem_alloc(bufsize, KM_SLEEP);
3811
3812	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
3813	    buf, &zc->zc_nvlist_dst_size);
3814
3815	if (error == 0) {
3816		error = xcopyout(buf,
3817		    (void *)(uintptr_t)zc->zc_nvlist_dst,
3818		    zc->zc_nvlist_dst_size);
3819	}
3820	kmem_free(buf, bufsize);
3821	zfsvfs_rele(zfsvfs, FTAG);
3822
3823	return (error);
3824}
3825
3826/*
3827 * inputs:
3828 * zc_name		name of filesystem
3829 *
3830 * outputs:
3831 * none
3832 */
3833static int
3834zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
3835{
3836	objset_t *os;
3837	int error = 0;
3838	zfsvfs_t *zfsvfs;
3839
3840	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3841		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
3842			/*
3843			 * If userused is not enabled, it may be because the
3844			 * objset needs to be closed & reopened (to grow the
3845			 * objset_phys_t).  Suspend/resume the fs will do that.
3846			 */
3847			error = zfs_suspend_fs(zfsvfs);
3848			if (error == 0)
3849				error = zfs_resume_fs(zfsvfs, zc->zc_name);
3850		}
3851		if (error == 0)
3852			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
3853		VFS_RELE(zfsvfs->z_vfs);
3854	} else {
3855		/* XXX kind of reading contents without owning */
3856		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
3857		if (error)
3858			return (error);
3859
3860		error = dmu_objset_userspace_upgrade(os);
3861		dmu_objset_rele(os, FTAG);
3862	}
3863
3864	return (error);
3865}
3866
3867/*
3868 * We don't want to have a hard dependency
3869 * against some special symbols in sharefs
3870 * nfs, and smbsrv.  Determine them if needed when
3871 * the first file system is shared.
3872 * Neither sharefs, nfs or smbsrv are unloadable modules.
3873 */
3874int (*znfsexport_fs)(void *arg);
3875int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
3876int (*zsmbexport_fs)(void *arg, boolean_t add_share);
3877
3878int zfs_nfsshare_inited;
3879int zfs_smbshare_inited;
3880
3881ddi_modhandle_t nfs_mod;
3882ddi_modhandle_t sharefs_mod;
3883ddi_modhandle_t smbsrv_mod;
3884kmutex_t zfs_share_lock;
3885
3886static int
3887zfs_init_sharefs()
3888{
3889	int error;
3890
3891	ASSERT(MUTEX_HELD(&zfs_share_lock));
3892	/* Both NFS and SMB shares also require sharetab support. */
3893	if (sharefs_mod == NULL && ((sharefs_mod =
3894	    ddi_modopen("fs/sharefs",
3895	    KRTLD_MODE_FIRST, &error)) == NULL)) {
3896		return (ENOSYS);
3897	}
3898	if (zshare_fs == NULL && ((zshare_fs =
3899	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
3900	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
3901		return (ENOSYS);
3902	}
3903	return (0);
3904}
3905
3906static int
3907zfs_ioc_share(zfs_cmd_t *zc)
3908{
3909	int error;
3910	int opcode;
3911
3912	switch (zc->zc_share.z_sharetype) {
3913	case ZFS_SHARE_NFS:
3914	case ZFS_UNSHARE_NFS:
3915		if (zfs_nfsshare_inited == 0) {
3916			mutex_enter(&zfs_share_lock);
3917			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
3918			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3919				mutex_exit(&zfs_share_lock);
3920				return (ENOSYS);
3921			}
3922			if (znfsexport_fs == NULL &&
3923			    ((znfsexport_fs = (int (*)(void *))
3924			    ddi_modsym(nfs_mod,
3925			    "nfs_export", &error)) == NULL)) {
3926				mutex_exit(&zfs_share_lock);
3927				return (ENOSYS);
3928			}
3929			error = zfs_init_sharefs();
3930			if (error) {
3931				mutex_exit(&zfs_share_lock);
3932				return (ENOSYS);
3933			}
3934			zfs_nfsshare_inited = 1;
3935			mutex_exit(&zfs_share_lock);
3936		}
3937		break;
3938	case ZFS_SHARE_SMB:
3939	case ZFS_UNSHARE_SMB:
3940		if (zfs_smbshare_inited == 0) {
3941			mutex_enter(&zfs_share_lock);
3942			if (smbsrv_mod == NULL && ((smbsrv_mod =
3943			    ddi_modopen("drv/smbsrv",
3944			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3945				mutex_exit(&zfs_share_lock);
3946				return (ENOSYS);
3947			}
3948			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
3949			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
3950			    "smb_server_share", &error)) == NULL)) {
3951				mutex_exit(&zfs_share_lock);
3952				return (ENOSYS);
3953			}
3954			error = zfs_init_sharefs();
3955			if (error) {
3956				mutex_exit(&zfs_share_lock);
3957				return (ENOSYS);
3958			}
3959			zfs_smbshare_inited = 1;
3960			mutex_exit(&zfs_share_lock);
3961		}
3962		break;
3963	default:
3964		return (EINVAL);
3965	}
3966
3967	switch (zc->zc_share.z_sharetype) {
3968	case ZFS_SHARE_NFS:
3969	case ZFS_UNSHARE_NFS:
3970		if (error =
3971		    znfsexport_fs((void *)
3972		    (uintptr_t)zc->zc_share.z_exportdata))
3973			return (error);
3974		break;
3975	case ZFS_SHARE_SMB:
3976	case ZFS_UNSHARE_SMB:
3977		if (error = zsmbexport_fs((void *)
3978		    (uintptr_t)zc->zc_share.z_exportdata,
3979		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
3980		    B_TRUE: B_FALSE)) {
3981			return (error);
3982		}
3983		break;
3984	}
3985
3986	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
3987	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
3988	    SHAREFS_ADD : SHAREFS_REMOVE;
3989
3990	/*
3991	 * Add or remove share from sharetab
3992	 */
3993	error = zshare_fs(opcode,
3994	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
3995	    zc->zc_share.z_sharemax);
3996
3997	return (error);
3998
3999}
4000
4001ace_t full_access[] = {
4002	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4003};
4004
4005/*
4006 * Remove all ACL files in shares dir
4007 */
4008static int
4009zfs_smb_acl_purge(znode_t *dzp)
4010{
4011	zap_cursor_t	zc;
4012	zap_attribute_t	zap;
4013	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4014	int error;
4015
4016	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4017	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4018	    zap_cursor_advance(&zc)) {
4019		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4020		    NULL, 0)) != 0)
4021			break;
4022	}
4023	zap_cursor_fini(&zc);
4024	return (error);
4025}
4026
4027static int
4028zfs_ioc_smb_acl(zfs_cmd_t *zc)
4029{
4030	vnode_t *vp;
4031	znode_t *dzp;
4032	vnode_t *resourcevp = NULL;
4033	znode_t *sharedir;
4034	zfsvfs_t *zfsvfs;
4035	nvlist_t *nvlist;
4036	char *src, *target;
4037	vattr_t vattr;
4038	vsecattr_t vsec;
4039	int error = 0;
4040
4041	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4042	    NO_FOLLOW, NULL, &vp)) != 0)
4043		return (error);
4044
4045	/* Now make sure mntpnt and dataset are ZFS */
4046
4047	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4048	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4049	    zc->zc_name) != 0)) {
4050		VN_RELE(vp);
4051		return (EINVAL);
4052	}
4053
4054	dzp = VTOZ(vp);
4055	zfsvfs = dzp->z_zfsvfs;
4056	ZFS_ENTER(zfsvfs);
4057
4058	/*
4059	 * Create share dir if its missing.
4060	 */
4061	mutex_enter(&zfsvfs->z_lock);
4062	if (zfsvfs->z_shares_dir == 0) {
4063		dmu_tx_t *tx;
4064
4065		tx = dmu_tx_create(zfsvfs->z_os);
4066		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4067		    ZFS_SHARES_DIR);
4068		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4069		error = dmu_tx_assign(tx, TXG_WAIT);
4070		if (error) {
4071			dmu_tx_abort(tx);
4072		} else {
4073			error = zfs_create_share_dir(zfsvfs, tx);
4074			dmu_tx_commit(tx);
4075		}
4076		if (error) {
4077			mutex_exit(&zfsvfs->z_lock);
4078			VN_RELE(vp);
4079			ZFS_EXIT(zfsvfs);
4080			return (error);
4081		}
4082	}
4083	mutex_exit(&zfsvfs->z_lock);
4084
4085	ASSERT(zfsvfs->z_shares_dir);
4086	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4087		VN_RELE(vp);
4088		ZFS_EXIT(zfsvfs);
4089		return (error);
4090	}
4091
4092	switch (zc->zc_cookie) {
4093	case ZFS_SMB_ACL_ADD:
4094		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4095		vattr.va_type = VREG;
4096		vattr.va_mode = S_IFREG|0777;
4097		vattr.va_uid = 0;
4098		vattr.va_gid = 0;
4099
4100		vsec.vsa_mask = VSA_ACE;
4101		vsec.vsa_aclentp = &full_access;
4102		vsec.vsa_aclentsz = sizeof (full_access);
4103		vsec.vsa_aclcnt = 1;
4104
4105		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4106		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4107		if (resourcevp)
4108			VN_RELE(resourcevp);
4109		break;
4110
4111	case ZFS_SMB_ACL_REMOVE:
4112		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4113		    NULL, 0);
4114		break;
4115
4116	case ZFS_SMB_ACL_RENAME:
4117		if ((error = get_nvlist(zc->zc_nvlist_src,
4118		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4119			VN_RELE(vp);
4120			ZFS_EXIT(zfsvfs);
4121			return (error);
4122		}
4123		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4124		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4125		    &target)) {
4126			VN_RELE(vp);
4127			VN_RELE(ZTOV(sharedir));
4128			ZFS_EXIT(zfsvfs);
4129			nvlist_free(nvlist);
4130			return (error);
4131		}
4132		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4133		    kcred, NULL, 0);
4134		nvlist_free(nvlist);
4135		break;
4136
4137	case ZFS_SMB_ACL_PURGE:
4138		error = zfs_smb_acl_purge(sharedir);
4139		break;
4140
4141	default:
4142		error = EINVAL;
4143		break;
4144	}
4145
4146	VN_RELE(vp);
4147	VN_RELE(ZTOV(sharedir));
4148
4149	ZFS_EXIT(zfsvfs);
4150
4151	return (error);
4152}
4153
4154/*
4155 * inputs:
4156 * zc_name	name of filesystem
4157 * zc_value	short name of snap
4158 * zc_string	user-supplied tag for this reference
4159 * zc_cookie	recursive flag
4160 * zc_temphold	set if hold is temporary
4161 *
4162 * outputs:		none
4163 */
4164static int
4165zfs_ioc_hold(zfs_cmd_t *zc)
4166{
4167	boolean_t recursive = zc->zc_cookie;
4168
4169	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4170		return (EINVAL);
4171
4172	return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4173	    zc->zc_string, recursive, zc->zc_temphold));
4174}
4175
4176/*
4177 * inputs:
4178 * zc_name	name of dataset from which we're releasing a user reference
4179 * zc_value	short name of snap
4180 * zc_string	user-supplied tag for this reference
4181 * zc_cookie	recursive flag
4182 *
4183 * outputs:		none
4184 */
4185static int
4186zfs_ioc_release(zfs_cmd_t *zc)
4187{
4188	boolean_t recursive = zc->zc_cookie;
4189
4190	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4191		return (EINVAL);
4192
4193	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4194	    zc->zc_string, recursive));
4195}
4196
4197/*
4198 * inputs:
4199 * zc_name		name of filesystem
4200 *
4201 * outputs:
4202 * zc_nvlist_src{_size}	nvlist of snapshot holds
4203 */
4204static int
4205zfs_ioc_get_holds(zfs_cmd_t *zc)
4206{
4207	nvlist_t *nvp;
4208	int error;
4209
4210	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4211		error = put_nvlist(zc, nvp);
4212		nvlist_free(nvp);
4213	}
4214
4215	return (error);
4216}
4217
4218/*
4219 * pool create, destroy, and export don't log the history as part of
4220 * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4221 * do the logging of those commands.
4222 */
4223static zfs_ioc_vec_t zfs_ioc_vec[] = {
4224	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4225	    B_FALSE },
4226	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4227	    B_FALSE },
4228	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4229	    B_FALSE },
4230	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4231	    B_FALSE },
4232	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
4233	    B_FALSE },
4234	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4235	    B_FALSE },
4236	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4237	    B_FALSE },
4238	{ zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4239	    B_TRUE },
4240	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4241	    B_FALSE },
4242	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
4243	    B_TRUE },
4244	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4245	    B_FALSE },
4246	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4247	    B_TRUE },
4248	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4249	    B_TRUE },
4250	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4251	    B_FALSE },
4252	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4253	    B_TRUE },
4254	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4255	    B_TRUE },
4256	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4257	    B_TRUE },
4258	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4259	    B_TRUE },
4260	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4261	    B_TRUE },
4262	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4263	    B_FALSE },
4264	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4265	    B_TRUE },
4266	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4267	    B_TRUE },
4268	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, B_TRUE },
4269	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, B_TRUE },
4270	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4271	    B_TRUE},
4272	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4273	    B_TRUE },
4274	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE, B_TRUE },
4275	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, B_TRUE },
4276	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, B_FALSE },
4277	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
4278	    B_FALSE },
4279	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4280	    B_FALSE },
4281	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4282	    B_FALSE },
4283	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4284	    B_FALSE },
4285	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, B_FALSE },
4286	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4287	    B_TRUE },
4288	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME,
4289	    B_TRUE, B_TRUE },
4290	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4291	    B_TRUE },
4292	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4293	    B_FALSE },
4294	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
4295	    B_TRUE },
4296	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4297	    B_TRUE },
4298	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4299	    B_FALSE },
4300	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4301	    B_TRUE },
4302	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4303	    B_FALSE },
4304	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, B_FALSE },
4305	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4306	    B_TRUE },
4307	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4308	    B_FALSE },
4309	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one,
4310	    DATASET_NAME, B_FALSE, B_FALSE },
4311	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many,
4312	    DATASET_NAME, B_FALSE, B_FALSE },
4313	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4314	    DATASET_NAME, B_FALSE, B_TRUE },
4315	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, B_TRUE },
4316	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4317	    B_TRUE },
4318	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4319	    B_TRUE },
4320	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4321	    B_FALSE },
4322	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4323	    B_TRUE }
4324};
4325
4326int
4327pool_status_check(const char *name, zfs_ioc_namecheck_t type)
4328{
4329	spa_t *spa;
4330	int error;
4331
4332	ASSERT(type == POOL_NAME || type == DATASET_NAME);
4333
4334	error = spa_open(name, &spa, FTAG);
4335	if (error == 0) {
4336		if (spa_suspended(spa))
4337			error = EAGAIN;
4338		spa_close(spa, FTAG);
4339	}
4340	return (error);
4341}
4342
4343static int
4344zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
4345{
4346	zfs_cmd_t *zc;
4347	uint_t vec;
4348	int error, rc;
4349
4350	if (getminor(dev) != 0)
4351		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
4352
4353	vec = cmd - ZFS_IOC;
4354	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
4355
4356	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
4357		return (EINVAL);
4358
4359	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
4360
4361	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
4362	if (error != 0)
4363		error = EFAULT;
4364
4365	if ((error == 0) && !(flag & FKIOCTL))
4366		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
4367
4368	/*
4369	 * Ensure that all pool/dataset names are valid before we pass down to
4370	 * the lower layers.
4371	 */
4372	if (error == 0) {
4373		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4374		zc->zc_iflags = flag & FKIOCTL;
4375		switch (zfs_ioc_vec[vec].zvec_namecheck) {
4376		case POOL_NAME:
4377			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
4378				error = EINVAL;
4379			if (zfs_ioc_vec[vec].zvec_pool_check)
4380				error = pool_status_check(zc->zc_name,
4381				    zfs_ioc_vec[vec].zvec_namecheck);
4382			break;
4383
4384		case DATASET_NAME:
4385			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
4386				error = EINVAL;
4387			if (zfs_ioc_vec[vec].zvec_pool_check)
4388				error = pool_status_check(zc->zc_name,
4389				    zfs_ioc_vec[vec].zvec_namecheck);
4390			break;
4391
4392		case NO_NAME:
4393			break;
4394		}
4395	}
4396
4397	if (error == 0)
4398		error = zfs_ioc_vec[vec].zvec_func(zc);
4399
4400	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
4401	if (error == 0) {
4402		if (rc != 0)
4403			error = EFAULT;
4404		if (zfs_ioc_vec[vec].zvec_his_log)
4405			zfs_log_history(zc);
4406	}
4407
4408	kmem_free(zc, sizeof (zfs_cmd_t));
4409	return (error);
4410}
4411
4412static int
4413zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4414{
4415	if (cmd != DDI_ATTACH)
4416		return (DDI_FAILURE);
4417
4418	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
4419	    DDI_PSEUDO, 0) == DDI_FAILURE)
4420		return (DDI_FAILURE);
4421
4422	zfs_dip = dip;
4423
4424	ddi_report_dev(dip);
4425
4426	return (DDI_SUCCESS);
4427}
4428
4429static int
4430zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4431{
4432	if (spa_busy() || zfs_busy() || zvol_busy())
4433		return (DDI_FAILURE);
4434
4435	if (cmd != DDI_DETACH)
4436		return (DDI_FAILURE);
4437
4438	zfs_dip = NULL;
4439
4440	ddi_prop_remove_all(dip);
4441	ddi_remove_minor_node(dip, NULL);
4442
4443	return (DDI_SUCCESS);
4444}
4445
4446/*ARGSUSED*/
4447static int
4448zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
4449{
4450	switch (infocmd) {
4451	case DDI_INFO_DEVT2DEVINFO:
4452		*result = zfs_dip;
4453		return (DDI_SUCCESS);
4454
4455	case DDI_INFO_DEVT2INSTANCE:
4456		*result = (void *)0;
4457		return (DDI_SUCCESS);
4458	}
4459
4460	return (DDI_FAILURE);
4461}
4462
4463/*
4464 * OK, so this is a little weird.
4465 *
4466 * /dev/zfs is the control node, i.e. minor 0.
4467 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
4468 *
4469 * /dev/zfs has basically nothing to do except serve up ioctls,
4470 * so most of the standard driver entry points are in zvol.c.
4471 */
4472static struct cb_ops zfs_cb_ops = {
4473	zvol_open,	/* open */
4474	zvol_close,	/* close */
4475	zvol_strategy,	/* strategy */
4476	nodev,		/* print */
4477	zvol_dump,	/* dump */
4478	zvol_read,	/* read */
4479	zvol_write,	/* write */
4480	zfsdev_ioctl,	/* ioctl */
4481	nodev,		/* devmap */
4482	nodev,		/* mmap */
4483	nodev,		/* segmap */
4484	nochpoll,	/* poll */
4485	ddi_prop_op,	/* prop_op */
4486	NULL,		/* streamtab */
4487	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
4488	CB_REV,		/* version */
4489	nodev,		/* async read */
4490	nodev,		/* async write */
4491};
4492
4493static struct dev_ops zfs_dev_ops = {
4494	DEVO_REV,	/* version */
4495	0,		/* refcnt */
4496	zfs_info,	/* info */
4497	nulldev,	/* identify */
4498	nulldev,	/* probe */
4499	zfs_attach,	/* attach */
4500	zfs_detach,	/* detach */
4501	nodev,		/* reset */
4502	&zfs_cb_ops,	/* driver operations */
4503	NULL,		/* no bus operations */
4504	NULL,		/* power */
4505	ddi_quiesce_not_needed,	/* quiesce */
4506};
4507
4508static struct modldrv zfs_modldrv = {
4509	&mod_driverops,
4510	"ZFS storage pool",
4511	&zfs_dev_ops
4512};
4513
4514static struct modlinkage modlinkage = {
4515	MODREV_1,
4516	(void *)&zfs_modlfs,
4517	(void *)&zfs_modldrv,
4518	NULL
4519};
4520
4521
4522uint_t zfs_fsyncer_key;
4523extern uint_t rrw_tsd_key;
4524
4525int
4526_init(void)
4527{
4528	int error;
4529
4530	spa_init(FREAD | FWRITE);
4531	zfs_init();
4532	zvol_init();
4533
4534	if ((error = mod_install(&modlinkage)) != 0) {
4535		zvol_fini();
4536		zfs_fini();
4537		spa_fini();
4538		return (error);
4539	}
4540
4541	tsd_create(&zfs_fsyncer_key, NULL);
4542	tsd_create(&rrw_tsd_key, NULL);
4543
4544	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
4545	ASSERT(error == 0);
4546	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
4547
4548	return (0);
4549}
4550
4551int
4552_fini(void)
4553{
4554	int error;
4555
4556	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
4557		return (EBUSY);
4558
4559	if ((error = mod_remove(&modlinkage)) != 0)
4560		return (error);
4561
4562	zvol_fini();
4563	zfs_fini();
4564	spa_fini();
4565	if (zfs_nfsshare_inited)
4566		(void) ddi_modclose(nfs_mod);
4567	if (zfs_smbshare_inited)
4568		(void) ddi_modclose(smbsrv_mod);
4569	if (zfs_nfsshare_inited || zfs_smbshare_inited)
4570		(void) ddi_modclose(sharefs_mod);
4571
4572	tsd_destroy(&zfs_fsyncer_key);
4573	ldi_ident_release(zfs_li);
4574	zfs_li = NULL;
4575	mutex_destroy(&zfs_share_lock);
4576
4577	return (error);
4578}
4579
4580int
4581_info(struct modinfo *modinfop)
4582{
4583	return (mod_info(&modlinkage, modinfop));
4584}
4585