1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright (c) 2017 Datto Inc.
26 * Copyright 2017 RackTop Systems.
27 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
28 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
29 */
30
31/*
32 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
33 * It has the following characteristics:
34 *
35 *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
36 *  threads.  This is accomplished primarily by avoiding global data
37 *  (e.g. caching).  Since it's thread-safe, there is no reason for a
38 *  process to have multiple libzfs "instances".  Therefore, we store
39 *  our few pieces of data (e.g. the file descriptor) in global
40 *  variables.  The fd is reference-counted so that the libzfs_core
41 *  library can be "initialized" multiple times (e.g. by different
42 *  consumers within the same process).
43 *
44 *  - Committed Interface.  The libzfs_core interface will be committed,
45 *  therefore consumers can compile against it and be confident that
46 *  their code will continue to work on future releases of this code.
47 *  Currently, the interface is Evolving (not Committed), but we intend
48 *  to commit to it once it is more complete and we determine that it
49 *  meets the needs of all consumers.
50 *
51 *  - Programmatic Error Handling.  libzfs_core communicates errors with
52 *  defined error numbers, and doesn't print anything to stdout/stderr.
53 *
54 *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
55 *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
56 *  between libzfs_core functions and ioctls to ZFS_DEV.
57 *
58 *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
59 *  with kernel ioctls, and kernel ioctls are general atomic, each
60 *  libzfs_core function is atomic.  For example, creating multiple
61 *  snapshots with a single call to lzc_snapshot() is atomic -- it
62 *  can't fail with only some of the requested snapshots created, even
63 *  in the event of power loss or system crash.
64 *
65 *  - Continued libzfs Support.  Some higher-level operations (e.g.
66 *  support for "zfs send -R") are too complicated to fit the scope of
67 *  libzfs_core.  This functionality will continue to live in libzfs.
68 *  Where appropriate, libzfs will use the underlying atomic operations
69 *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
70 *  zfs receive" by using individual "send one snapshot", rename,
71 *  destroy, and "receive one snapshot" operations in libzfs_core.
72 *  /sbin/zfs and /sbin/zpool will link with both libzfs and
73 *  libzfs_core.  Other consumers should aim to use only libzfs_core,
74 *  since that will be the supported, stable interface going forwards.
75 */
76
77#include <libzfs_core.h>
78#include <ctype.h>
79#include <unistd.h>
80#include <stdlib.h>
81#include <string.h>
82#ifdef ZFS_DEBUG
83#include <stdio.h>
84#endif
85#include <errno.h>
86#include <fcntl.h>
87#include <pthread.h>
88#include <libzutil.h>
89#include <sys/nvpair.h>
90#include <sys/param.h>
91#include <sys/types.h>
92#include <sys/stat.h>
93#include <sys/zfs_ioctl.h>
94
95static int g_fd = -1;
96static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
97static int g_refcount;
98
99#ifdef ZFS_DEBUG
100static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST;
101static zfs_errno_t fail_ioc_err;
102
103static void
104libzfs_core_debug_ioc(void)
105{
106	/*
107	 * To test running newer user space binaries with kernel's
108	 * that don't yet support an ioctl or a new ioctl arg we
109	 * provide an override to intentionally fail an ioctl.
110	 *
111	 * USAGE:
112	 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
113	 *
114	 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
115	 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
116	 *
117	 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
118	 * cannot checkpoint 'tank': the loaded zfs module does not support
119	 * this operation. A reboot may be required to enable this operation.
120	 */
121	if (fail_ioc_cmd == ZFS_IOC_LAST) {
122		char *ioc_test = getenv("ZFS_IOC_TEST");
123		unsigned int ioc_num = 0, ioc_err = 0;
124
125		if (ioc_test != NULL &&
126		    sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 &&
127		    ioc_num < ZFS_IOC_LAST)  {
128			fail_ioc_cmd = ioc_num;
129			fail_ioc_err = ioc_err;
130		}
131	}
132}
133#endif
134
135int
136libzfs_core_init(void)
137{
138	(void) pthread_mutex_lock(&g_lock);
139	if (g_refcount == 0) {
140		g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC);
141		if (g_fd < 0) {
142			(void) pthread_mutex_unlock(&g_lock);
143			return (errno);
144		}
145	}
146	g_refcount++;
147
148#ifdef ZFS_DEBUG
149	libzfs_core_debug_ioc();
150#endif
151	(void) pthread_mutex_unlock(&g_lock);
152	return (0);
153}
154
155void
156libzfs_core_fini(void)
157{
158	(void) pthread_mutex_lock(&g_lock);
159	ASSERT3S(g_refcount, >, 0);
160
161	if (g_refcount > 0)
162		g_refcount--;
163
164	if (g_refcount == 0 && g_fd != -1) {
165		(void) close(g_fd);
166		g_fd = -1;
167	}
168	(void) pthread_mutex_unlock(&g_lock);
169}
170
171static int
172lzc_ioctl(zfs_ioc_t ioc, const char *name,
173    nvlist_t *source, nvlist_t **resultp)
174{
175	zfs_cmd_t zc = {"\0"};
176	int error = 0;
177	char *packed = NULL;
178	size_t size = 0;
179
180	ASSERT3S(g_refcount, >, 0);
181	VERIFY3S(g_fd, !=, -1);
182
183#ifdef ZFS_DEBUG
184	if (ioc == fail_ioc_cmd)
185		return (fail_ioc_err);
186#endif
187
188	if (name != NULL)
189		(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
190
191	if (source != NULL) {
192		packed = fnvlist_pack(source, &size);
193		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
194		zc.zc_nvlist_src_size = size;
195	}
196
197	if (resultp != NULL) {
198		*resultp = NULL;
199		if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
200			zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
201			    ZCP_ARG_MEMLIMIT);
202		} else {
203			zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
204		}
205		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
206		    malloc(zc.zc_nvlist_dst_size);
207		if (zc.zc_nvlist_dst == (uint64_t)0) {
208			error = ENOMEM;
209			goto out;
210		}
211	}
212
213	while (zfs_ioctl_fd(g_fd, ioc, &zc) != 0) {
214		/*
215		 * If ioctl exited with ENOMEM, we retry the ioctl after
216		 * increasing the size of the destination nvlist.
217		 *
218		 * Channel programs that exit with ENOMEM ran over the
219		 * lua memory sandbox; they should not be retried.
220		 */
221		if (errno == ENOMEM && resultp != NULL &&
222		    ioc != ZFS_IOC_CHANNEL_PROGRAM) {
223			free((void *)(uintptr_t)zc.zc_nvlist_dst);
224			zc.zc_nvlist_dst_size *= 2;
225			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
226			    malloc(zc.zc_nvlist_dst_size);
227			if (zc.zc_nvlist_dst == (uint64_t)0) {
228				error = ENOMEM;
229				goto out;
230			}
231		} else {
232			error = errno;
233			break;
234		}
235	}
236	if (zc.zc_nvlist_dst_filled) {
237		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
238		    zc.zc_nvlist_dst_size);
239	}
240
241out:
242	if (packed != NULL)
243		fnvlist_pack_free(packed, size);
244	free((void *)(uintptr_t)zc.zc_nvlist_dst);
245	return (error);
246}
247
248int
249lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
250    uint8_t *wkeydata, uint_t wkeylen)
251{
252	int error;
253	nvlist_t *hidden_args = NULL;
254	nvlist_t *args = fnvlist_alloc();
255
256	fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
257	if (props != NULL)
258		fnvlist_add_nvlist(args, "props", props);
259
260	if (wkeydata != NULL) {
261		hidden_args = fnvlist_alloc();
262		fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
263		    wkeylen);
264		fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
265	}
266
267	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
268	nvlist_free(hidden_args);
269	nvlist_free(args);
270	return (error);
271}
272
273int
274lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
275{
276	int error;
277	nvlist_t *hidden_args = NULL;
278	nvlist_t *args = fnvlist_alloc();
279
280	fnvlist_add_string(args, "origin", origin);
281	if (props != NULL)
282		fnvlist_add_nvlist(args, "props", props);
283	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
284	nvlist_free(hidden_args);
285	nvlist_free(args);
286	return (error);
287}
288
289int
290lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
291{
292	/*
293	 * The promote ioctl is still legacy, so we need to construct our
294	 * own zfs_cmd_t rather than using lzc_ioctl().
295	 */
296	zfs_cmd_t zc = {"\0"};
297
298	ASSERT3S(g_refcount, >, 0);
299	VERIFY3S(g_fd, !=, -1);
300
301	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
302	if (zfs_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
303		int error = errno;
304		if (error == EEXIST && snapnamebuf != NULL)
305			(void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
306		return (error);
307	}
308	return (0);
309}
310
311int
312lzc_rename(const char *source, const char *target)
313{
314	zfs_cmd_t zc = {"\0"};
315	int error;
316
317	ASSERT3S(g_refcount, >, 0);
318	VERIFY3S(g_fd, !=, -1);
319	(void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
320	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
321	error = zfs_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc);
322	if (error != 0)
323		error = errno;
324	return (error);
325}
326int
327lzc_destroy(const char *fsname)
328{
329	int error;
330	nvlist_t *args = fnvlist_alloc();
331	error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
332	nvlist_free(args);
333	return (error);
334}
335
336/*
337 * Creates snapshots.
338 *
339 * The keys in the snaps nvlist are the snapshots to be created.
340 * They must all be in the same pool.
341 *
342 * The props nvlist is properties to set.  Currently only user properties
343 * are supported.  { user:prop_name -> string value }
344 *
345 * The returned results nvlist will have an entry for each snapshot that failed.
346 * The value will be the (int32) error code.
347 *
348 * The return value will be 0 if all snapshots were created, otherwise it will
349 * be the errno of a (unspecified) snapshot that failed.
350 */
351int
352lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
353{
354	nvpair_t *elem;
355	nvlist_t *args;
356	int error;
357	char pool[ZFS_MAX_DATASET_NAME_LEN];
358
359	*errlist = NULL;
360
361	/* determine the pool name */
362	elem = nvlist_next_nvpair(snaps, NULL);
363	if (elem == NULL)
364		return (0);
365	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
366	pool[strcspn(pool, "/@")] = '\0';
367
368	args = fnvlist_alloc();
369	fnvlist_add_nvlist(args, "snaps", snaps);
370	if (props != NULL)
371		fnvlist_add_nvlist(args, "props", props);
372
373	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
374	nvlist_free(args);
375
376	return (error);
377}
378
379/*
380 * Destroys snapshots.
381 *
382 * The keys in the snaps nvlist are the snapshots to be destroyed.
383 * They must all be in the same pool.
384 *
385 * Snapshots that do not exist will be silently ignored.
386 *
387 * If 'defer' is not set, and a snapshot has user holds or clones, the
388 * destroy operation will fail and none of the snapshots will be
389 * destroyed.
390 *
391 * If 'defer' is set, and a snapshot has user holds or clones, it will be
392 * marked for deferred destruction, and will be destroyed when the last hold
393 * or clone is removed/destroyed.
394 *
395 * The return value will be 0 if all snapshots were destroyed (or marked for
396 * later destruction if 'defer' is set) or didn't exist to begin with.
397 *
398 * Otherwise the return value will be the errno of a (unspecified) snapshot
399 * that failed, no snapshots will be destroyed, and the errlist will have an
400 * entry for each snapshot that failed.  The value in the errlist will be
401 * the (int32) error code.
402 */
403int
404lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
405{
406	nvpair_t *elem;
407	nvlist_t *args;
408	int error;
409	char pool[ZFS_MAX_DATASET_NAME_LEN];
410
411	/* determine the pool name */
412	elem = nvlist_next_nvpair(snaps, NULL);
413	if (elem == NULL)
414		return (0);
415	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
416	pool[strcspn(pool, "/@")] = '\0';
417
418	args = fnvlist_alloc();
419	fnvlist_add_nvlist(args, "snaps", snaps);
420	if (defer)
421		fnvlist_add_boolean(args, "defer");
422
423	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
424	nvlist_free(args);
425
426	return (error);
427}
428
429int
430lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
431    uint64_t *usedp)
432{
433	nvlist_t *args;
434	nvlist_t *result;
435	int err;
436	char fs[ZFS_MAX_DATASET_NAME_LEN];
437	char *atp;
438
439	/* determine the fs name */
440	(void) strlcpy(fs, firstsnap, sizeof (fs));
441	atp = strchr(fs, '@');
442	if (atp == NULL)
443		return (EINVAL);
444	*atp = '\0';
445
446	args = fnvlist_alloc();
447	fnvlist_add_string(args, "firstsnap", firstsnap);
448
449	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
450	nvlist_free(args);
451	if (err == 0)
452		*usedp = fnvlist_lookup_uint64(result, "used");
453	fnvlist_free(result);
454
455	return (err);
456}
457
458boolean_t
459lzc_exists(const char *dataset)
460{
461	/*
462	 * The objset_stats ioctl is still legacy, so we need to construct our
463	 * own zfs_cmd_t rather than using lzc_ioctl().
464	 */
465	zfs_cmd_t zc = {"\0"};
466
467	ASSERT3S(g_refcount, >, 0);
468	VERIFY3S(g_fd, !=, -1);
469
470	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
471	return (zfs_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
472}
473
474/*
475 * outnvl is unused.
476 * It was added to preserve the function signature in case it is
477 * needed in the future.
478 */
479/*ARGSUSED*/
480int
481lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
482{
483	return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
484}
485
486/*
487 * Create "user holds" on snapshots.  If there is a hold on a snapshot,
488 * the snapshot can not be destroyed.  (However, it can be marked for deletion
489 * by lzc_destroy_snaps(defer=B_TRUE).)
490 *
491 * The keys in the nvlist are snapshot names.
492 * The snapshots must all be in the same pool.
493 * The value is the name of the hold (string type).
494 *
495 * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL).
496 * In this case, when the cleanup_fd is closed (including on process
497 * termination), the holds will be released.  If the system is shut down
498 * uncleanly, the holds will be released when the pool is next opened
499 * or imported.
500 *
501 * Holds for snapshots which don't exist will be skipped and have an entry
502 * added to errlist, but will not cause an overall failure.
503 *
504 * The return value will be 0 if all holds, for snapshots that existed,
505 * were successfully created.
506 *
507 * Otherwise the return value will be the errno of a (unspecified) hold that
508 * failed and no holds will be created.
509 *
510 * In all cases the errlist will have an entry for each hold that failed
511 * (name = snapshot), with its value being the error code (int32).
512 */
513int
514lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
515{
516	char pool[ZFS_MAX_DATASET_NAME_LEN];
517	nvlist_t *args;
518	nvpair_t *elem;
519	int error;
520
521	/* determine the pool name */
522	elem = nvlist_next_nvpair(holds, NULL);
523	if (elem == NULL)
524		return (0);
525	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
526	pool[strcspn(pool, "/@")] = '\0';
527
528	args = fnvlist_alloc();
529	fnvlist_add_nvlist(args, "holds", holds);
530	if (cleanup_fd != -1)
531		fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
532
533	error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
534	nvlist_free(args);
535	return (error);
536}
537
538/*
539 * Release "user holds" on snapshots.  If the snapshot has been marked for
540 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
541 * any clones, and all the user holds are removed, then the snapshot will be
542 * destroyed.
543 *
544 * The keys in the nvlist are snapshot names.
545 * The snapshots must all be in the same pool.
546 * The value is an nvlist whose keys are the holds to remove.
547 *
548 * Holds which failed to release because they didn't exist will have an entry
549 * added to errlist, but will not cause an overall failure.
550 *
551 * The return value will be 0 if the nvl holds was empty or all holds that
552 * existed, were successfully removed.
553 *
554 * Otherwise the return value will be the errno of a (unspecified) hold that
555 * failed to release and no holds will be released.
556 *
557 * In all cases the errlist will have an entry for each hold that failed to
558 * to release.
559 */
560int
561lzc_release(nvlist_t *holds, nvlist_t **errlist)
562{
563	char pool[ZFS_MAX_DATASET_NAME_LEN];
564	nvpair_t *elem;
565
566	/* determine the pool name */
567	elem = nvlist_next_nvpair(holds, NULL);
568	if (elem == NULL)
569		return (0);
570	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
571	pool[strcspn(pool, "/@")] = '\0';
572
573	return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
574}
575
576/*
577 * Retrieve list of user holds on the specified snapshot.
578 *
579 * On success, *holdsp will be set to an nvlist which the caller must free.
580 * The keys are the names of the holds, and the value is the creation time
581 * of the hold (uint64) in seconds since the epoch.
582 */
583int
584lzc_get_holds(const char *snapname, nvlist_t **holdsp)
585{
586	return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
587}
588
589/*
590 * Generate a zfs send stream for the specified snapshot and write it to
591 * the specified file descriptor.
592 *
593 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
594 *
595 * If "from" is NULL, a full (non-incremental) stream will be sent.
596 * If "from" is non-NULL, it must be the full name of a snapshot or
597 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
598 * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
599 * bookmark must represent an earlier point in the history of "snapname").
600 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
601 * or it can be the origin of "snapname"'s filesystem, or an earlier
602 * snapshot in the origin, etc.
603 *
604 * "fd" is the file descriptor to write the send stream to.
605 *
606 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
607 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
608 * records with drr_blksz > 128K.
609 *
610 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
611 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
612 * which the receiving system must support (as indicated by support
613 * for the "embedded_data" feature).
614 *
615 * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
616 * compressed WRITE records for blocks which are compressed on disk and in
617 * memory.  If the lz4_compress feature is active on the sending system, then
618 * the receiving system must have that feature enabled as well.
619 *
620 * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
621 * datasets, by sending data exactly as it exists on disk.  This allows backups
622 * to be taken even if encryption keys are not currently loaded.
623 */
624int
625lzc_send(const char *snapname, const char *from, int fd,
626    enum lzc_send_flags flags)
627{
628	return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
629	    NULL));
630}
631
632int
633lzc_send_redacted(const char *snapname, const char *from, int fd,
634    enum lzc_send_flags flags, const char *redactbook)
635{
636	return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
637	    redactbook));
638}
639
640int
641lzc_send_resume(const char *snapname, const char *from, int fd,
642    enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
643{
644	return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj,
645	    resumeoff, NULL));
646}
647
648/*
649 * snapname: The name of the "tosnap", or the snapshot whose contents we are
650 * sending.
651 * from: The name of the "fromsnap", or the incremental source.
652 * fd: File descriptor to write the stream to.
653 * flags: flags that determine features to be used by the stream.
654 * resumeobj: Object to resume from, for resuming send
655 * resumeoff: Offset to resume from, for resuming send.
656 * redactnv: nvlist of string -> boolean(ignored) containing the names of all
657 * the snapshots that we should redact with respect to.
658 * redactbook: Name of the redaction bookmark to create.
659 */
660int
661lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
662    enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
663    const char *redactbook)
664{
665	nvlist_t *args;
666	int err;
667
668	args = fnvlist_alloc();
669	fnvlist_add_int32(args, "fd", fd);
670	if (from != NULL)
671		fnvlist_add_string(args, "fromsnap", from);
672	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
673		fnvlist_add_boolean(args, "largeblockok");
674	if (flags & LZC_SEND_FLAG_EMBED_DATA)
675		fnvlist_add_boolean(args, "embedok");
676	if (flags & LZC_SEND_FLAG_COMPRESS)
677		fnvlist_add_boolean(args, "compressok");
678	if (flags & LZC_SEND_FLAG_RAW)
679		fnvlist_add_boolean(args, "rawok");
680	if (flags & LZC_SEND_FLAG_SAVED)
681		fnvlist_add_boolean(args, "savedok");
682	if (resumeobj != 0 || resumeoff != 0) {
683		fnvlist_add_uint64(args, "resume_object", resumeobj);
684		fnvlist_add_uint64(args, "resume_offset", resumeoff);
685	}
686	if (redactbook != NULL)
687		fnvlist_add_string(args, "redactbook", redactbook);
688
689	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
690	nvlist_free(args);
691	return (err);
692}
693
694/*
695 * "from" can be NULL, a snapshot, or a bookmark.
696 *
697 * If from is NULL, a full (non-incremental) stream will be estimated.  This
698 * is calculated very efficiently.
699 *
700 * If from is a snapshot, lzc_send_space uses the deadlists attached to
701 * each snapshot to efficiently estimate the stream size.
702 *
703 * If from is a bookmark, the indirect blocks in the destination snapshot
704 * are traversed, looking for blocks with a birth time since the creation TXG of
705 * the snapshot this bookmark was created from.  This will result in
706 * significantly more I/O and be less efficient than a send space estimation on
707 * an equivalent snapshot. This process is also used if redact_snaps is
708 * non-null.
709 */
710int
711lzc_send_space_resume_redacted(const char *snapname, const char *from,
712    enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
713    uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
714{
715	nvlist_t *args;
716	nvlist_t *result;
717	int err;
718
719	args = fnvlist_alloc();
720	if (from != NULL)
721		fnvlist_add_string(args, "from", from);
722	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
723		fnvlist_add_boolean(args, "largeblockok");
724	if (flags & LZC_SEND_FLAG_EMBED_DATA)
725		fnvlist_add_boolean(args, "embedok");
726	if (flags & LZC_SEND_FLAG_COMPRESS)
727		fnvlist_add_boolean(args, "compressok");
728	if (flags & LZC_SEND_FLAG_RAW)
729		fnvlist_add_boolean(args, "rawok");
730	if (resumeobj != 0 || resumeoff != 0) {
731		fnvlist_add_uint64(args, "resume_object", resumeobj);
732		fnvlist_add_uint64(args, "resume_offset", resumeoff);
733		fnvlist_add_uint64(args, "bytes", resume_bytes);
734	}
735	if (redactbook != NULL)
736		fnvlist_add_string(args, "redactbook", redactbook);
737	if (fd != -1)
738		fnvlist_add_int32(args, "fd", fd);
739
740	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
741	nvlist_free(args);
742	if (err == 0)
743		*spacep = fnvlist_lookup_uint64(result, "space");
744	nvlist_free(result);
745	return (err);
746}
747
748int
749lzc_send_space(const char *snapname, const char *from,
750    enum lzc_send_flags flags, uint64_t *spacep)
751{
752	return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0,
753	    NULL, -1, spacep));
754}
755
756static int
757recv_read(int fd, void *buf, int ilen)
758{
759	char *cp = buf;
760	int rv;
761	int len = ilen;
762
763	do {
764		rv = read(fd, cp, len);
765		cp += rv;
766		len -= rv;
767	} while (rv > 0);
768
769	if (rv < 0 || len != 0)
770		return (EIO);
771
772	return (0);
773}
774
775/*
776 * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
777 * legacy ZFS_IOC_RECV user/kernel interface.  The new interface supports all
778 * stream options but is currently only used for resumable streams.  This way
779 * updated user space utilities will interoperate with older kernel modules.
780 *
781 * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
782 */
783static int
784recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
785    uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
786    boolean_t resumable, boolean_t raw, int input_fd,
787    const dmu_replay_record_t *begin_record, uint64_t *read_bytes,
788    uint64_t *errflags, nvlist_t **errors)
789{
790	dmu_replay_record_t drr;
791	char fsname[MAXPATHLEN];
792	char *atp;
793	int error;
794	boolean_t payload = B_FALSE;
795
796	ASSERT3S(g_refcount, >, 0);
797	VERIFY3S(g_fd, !=, -1);
798
799	/* Set 'fsname' to the name of containing filesystem */
800	(void) strlcpy(fsname, snapname, sizeof (fsname));
801	atp = strchr(fsname, '@');
802	if (atp == NULL)
803		return (EINVAL);
804	*atp = '\0';
805
806	/* If the fs does not exist, try its parent. */
807	if (!lzc_exists(fsname)) {
808		char *slashp = strrchr(fsname, '/');
809		if (slashp == NULL)
810			return (ENOENT);
811		*slashp = '\0';
812	}
813
814	/*
815	 * The begin_record is normally a non-byteswapped BEGIN record.
816	 * For resumable streams it may be set to any non-byteswapped
817	 * dmu_replay_record_t.
818	 */
819	if (begin_record == NULL) {
820		error = recv_read(input_fd, &drr, sizeof (drr));
821		if (error != 0)
822			return (error);
823	} else {
824		drr = *begin_record;
825		payload = (begin_record->drr_payloadlen != 0);
826	}
827
828	/*
829	 * All receives with a payload should use the new interface.
830	 */
831	if (resumable || raw || wkeydata != NULL || payload) {
832		nvlist_t *outnvl = NULL;
833		nvlist_t *innvl = fnvlist_alloc();
834
835		fnvlist_add_string(innvl, "snapname", snapname);
836
837		if (recvdprops != NULL)
838			fnvlist_add_nvlist(innvl, "props", recvdprops);
839
840		if (localprops != NULL)
841			fnvlist_add_nvlist(innvl, "localprops", localprops);
842
843		if (wkeydata != NULL) {
844			/*
845			 * wkeydata must be placed in the special
846			 * ZPOOL_HIDDEN_ARGS nvlist so that it
847			 * will not be printed to the zpool history.
848			 */
849			nvlist_t *hidden_args = fnvlist_alloc();
850			fnvlist_add_uint8_array(hidden_args, "wkeydata",
851			    wkeydata, wkeylen);
852			fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS,
853			    hidden_args);
854			nvlist_free(hidden_args);
855		}
856
857		if (origin != NULL && strlen(origin))
858			fnvlist_add_string(innvl, "origin", origin);
859
860		fnvlist_add_byte_array(innvl, "begin_record",
861		    (uchar_t *)&drr, sizeof (drr));
862
863		fnvlist_add_int32(innvl, "input_fd", input_fd);
864
865		if (force)
866			fnvlist_add_boolean(innvl, "force");
867
868		if (resumable)
869			fnvlist_add_boolean(innvl, "resumable");
870
871
872		error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
873
874		if (error == 0 && read_bytes != NULL)
875			error = nvlist_lookup_uint64(outnvl, "read_bytes",
876			    read_bytes);
877
878		if (error == 0 && errflags != NULL)
879			error = nvlist_lookup_uint64(outnvl, "error_flags",
880			    errflags);
881
882		if (error == 0 && errors != NULL) {
883			nvlist_t *nvl;
884			error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
885			if (error == 0)
886				*errors = fnvlist_dup(nvl);
887		}
888
889		fnvlist_free(innvl);
890		fnvlist_free(outnvl);
891	} else {
892		zfs_cmd_t zc = {"\0"};
893		char *packed = NULL;
894		size_t size;
895
896		ASSERT3S(g_refcount, >, 0);
897
898		(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
899		(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
900
901		if (recvdprops != NULL) {
902			packed = fnvlist_pack(recvdprops, &size);
903			zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
904			zc.zc_nvlist_src_size = size;
905		}
906
907		if (localprops != NULL) {
908			packed = fnvlist_pack(localprops, &size);
909			zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
910			zc.zc_nvlist_conf_size = size;
911		}
912
913		if (origin != NULL)
914			(void) strlcpy(zc.zc_string, origin,
915			    sizeof (zc.zc_string));
916
917		ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
918		zc.zc_begin_record = drr.drr_u.drr_begin;
919		zc.zc_guid = force;
920		zc.zc_cookie = input_fd;
921		zc.zc_cleanup_fd = -1;
922		zc.zc_action_handle = 0;
923
924		zc.zc_nvlist_dst_size = 128 * 1024;
925		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
926		    malloc(zc.zc_nvlist_dst_size);
927
928		error = zfs_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc);
929		if (error != 0) {
930			error = errno;
931		} else {
932			if (read_bytes != NULL)
933				*read_bytes = zc.zc_cookie;
934
935			if (errflags != NULL)
936				*errflags = zc.zc_obj;
937
938			if (errors != NULL)
939				VERIFY0(nvlist_unpack(
940				    (void *)(uintptr_t)zc.zc_nvlist_dst,
941				    zc.zc_nvlist_dst_size, errors, KM_SLEEP));
942		}
943
944		if (packed != NULL)
945			fnvlist_pack_free(packed, size);
946		free((void *)(uintptr_t)zc.zc_nvlist_dst);
947	}
948
949	return (error);
950}
951
952/*
953 * The simplest receive case: receive from the specified fd, creating the
954 * specified snapshot.  Apply the specified properties as "received" properties
955 * (which can be overridden by locally-set properties).  If the stream is a
956 * clone, its origin snapshot must be specified by 'origin'.  The 'force'
957 * flag will cause the target filesystem to be rolled back or destroyed if
958 * necessary to receive.
959 *
960 * Return 0 on success or an errno on failure.
961 *
962 * Note: this interface does not work on dedup'd streams
963 * (those with DMU_BACKUP_FEATURE_DEDUP).
964 */
965int
966lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
967    boolean_t force, boolean_t raw, int fd)
968{
969	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
970	    B_FALSE, raw, fd, NULL, NULL, NULL, NULL));
971}
972
973/*
974 * Like lzc_receive, but if the receive fails due to premature stream
975 * termination, the intermediate state will be preserved on disk.  In this
976 * case, ECKSUM will be returned.  The receive may subsequently be resumed
977 * with a resuming send stream generated by lzc_send_resume().
978 */
979int
980lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
981    boolean_t force, boolean_t raw, int fd)
982{
983	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
984	    B_TRUE, raw, fd, NULL, NULL, NULL, NULL));
985}
986
987/*
988 * Like lzc_receive, but allows the caller to read the begin record and then to
989 * pass it in.  That could be useful if the caller wants to derive, for example,
990 * the snapname or the origin parameters based on the information contained in
991 * the begin record.
992 * The begin record must be in its original form as read from the stream,
993 * in other words, it should not be byteswapped.
994 *
995 * The 'resumable' parameter allows to obtain the same behavior as with
996 * lzc_receive_resumable.
997 */
998int
999lzc_receive_with_header(const char *snapname, nvlist_t *props,
1000    const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1001    int fd, const dmu_replay_record_t *begin_record)
1002{
1003	if (begin_record == NULL)
1004		return (EINVAL);
1005
1006	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1007	    resumable, raw, fd, begin_record, NULL, NULL, NULL));
1008}
1009
1010/*
1011 * Like lzc_receive, but allows the caller to pass all supported arguments
1012 * and retrieve all values returned.  The only additional input parameter
1013 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
1014 *
1015 * The following parameters all provide return values.  Several may be set
1016 * in the failure case and will contain additional information.
1017 *
1018 * The 'read_bytes' value will be set to the total number of bytes read.
1019 *
1020 * The 'errflags' value will contain zprop_errflags_t flags which are
1021 * used to describe any failures.
1022 *
1023 * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored.
1024 *
1025 * The 'errors' nvlist contains an entry for each unapplied received
1026 * property.  Callers are responsible for freeing this nvlist.
1027 */
1028int lzc_receive_one(const char *snapname, nvlist_t *props,
1029    const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1030    int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
1031    uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1032    nvlist_t **errors)
1033{
1034	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1035	    resumable, raw, input_fd, begin_record,
1036	    read_bytes, errflags, errors));
1037}
1038
1039/*
1040 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
1041 * argument.
1042 *
1043 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
1044 * exclude ('zfs receive -x') properties. Callers are responsible for freeing
1045 * this nvlist
1046 */
1047int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
1048    nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
1049    boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
1050    const dmu_replay_record_t *begin_record, int cleanup_fd,
1051    uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1052    nvlist_t **errors)
1053{
1054	return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
1055	    force, resumable, raw, input_fd, begin_record,
1056	    read_bytes, errflags, errors));
1057}
1058
1059/*
1060 * Roll back this filesystem or volume to its most recent snapshot.
1061 * If snapnamebuf is not NULL, it will be filled in with the name
1062 * of the most recent snapshot.
1063 * Note that the latest snapshot may change if a new one is concurrently
1064 * created or the current one is destroyed.  lzc_rollback_to can be used
1065 * to roll back to a specific latest snapshot.
1066 *
1067 * Return 0 on success or an errno on failure.
1068 */
1069int
1070lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
1071{
1072	nvlist_t *args;
1073	nvlist_t *result;
1074	int err;
1075
1076	args = fnvlist_alloc();
1077	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1078	nvlist_free(args);
1079	if (err == 0 && snapnamebuf != NULL) {
1080		const char *snapname = fnvlist_lookup_string(result, "target");
1081		(void) strlcpy(snapnamebuf, snapname, snapnamelen);
1082	}
1083	nvlist_free(result);
1084
1085	return (err);
1086}
1087
1088/*
1089 * Roll back this filesystem or volume to the specified snapshot,
1090 * if possible.
1091 *
1092 * Return 0 on success or an errno on failure.
1093 */
1094int
1095lzc_rollback_to(const char *fsname, const char *snapname)
1096{
1097	nvlist_t *args;
1098	nvlist_t *result;
1099	int err;
1100
1101	args = fnvlist_alloc();
1102	fnvlist_add_string(args, "target", snapname);
1103	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1104	nvlist_free(args);
1105	nvlist_free(result);
1106	return (err);
1107}
1108
1109/*
1110 * Creates new bookmarks from existing snapshot or bookmark.
1111 *
1112 * The bookmarks nvlist maps from the full name of the new bookmark to
1113 * the full name of the source snapshot or bookmark.
1114 * All the bookmarks and snapshots must be in the same pool.
1115 * The new bookmarks names must be unique.
1116 * => see function dsl_bookmark_create_nvl_validate
1117 *
1118 * The returned results nvlist will have an entry for each bookmark that failed.
1119 * The value will be the (int32) error code.
1120 *
1121 * The return value will be 0 if all bookmarks were created, otherwise it will
1122 * be the errno of a (undetermined) bookmarks that failed.
1123 */
1124int
1125lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
1126{
1127	nvpair_t *elem;
1128	int error;
1129	char pool[ZFS_MAX_DATASET_NAME_LEN];
1130
1131	/* determine pool name from first bookmark */
1132	elem = nvlist_next_nvpair(bookmarks, NULL);
1133	if (elem == NULL)
1134		return (0);
1135	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1136	pool[strcspn(pool, "/#")] = '\0';
1137
1138	error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
1139
1140	return (error);
1141}
1142
1143/*
1144 * Retrieve bookmarks.
1145 *
1146 * Retrieve the list of bookmarks for the given file system. The props
1147 * parameter is an nvlist of property names (with no values) that will be
1148 * returned for each bookmark.
1149 *
1150 * The following are valid properties on bookmarks, most of which are numbers
1151 * (represented as uint64 in the nvlist), except redact_snaps, which is a
1152 * uint64 array, and redact_complete, which is a boolean
1153 *
1154 * "guid" - globally unique identifier of the snapshot it refers to
1155 * "createtxg" - txg when the snapshot it refers to was created
1156 * "creation" - timestamp when the snapshot it refers to was created
1157 * "ivsetguid" - IVset guid for identifying encrypted snapshots
1158 * "redact_snaps" - list of guids of the redaction snapshots for the specified
1159 *     bookmark.  If the bookmark is not a redaction bookmark, the nvlist will
1160 *     not contain an entry for this value.  If it is redacted with respect to
1161 *     no snapshots, it will contain value -> NULL uint64 array
1162 * "redact_complete" - boolean value; true if the redaction bookmark is
1163 *     complete, false otherwise.
1164 *
1165 * The format of the returned nvlist as follows:
1166 * <short name of bookmark> -> {
1167 *     <name of property> -> {
1168 *         "value" -> uint64
1169 *     }
1170 *     ...
1171 *     "redact_snaps" -> {
1172 *         "value" -> uint64 array
1173 *     }
1174 *     "redact_complete" -> {
1175 *         "value" -> boolean value
1176 *     }
1177 *  }
1178 */
1179int
1180lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
1181{
1182	return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
1183}
1184
1185/*
1186 * Get bookmark properties.
1187 *
1188 * Given a bookmark's full name, retrieve all properties for the bookmark.
1189 *
1190 * The format of the returned property list is as follows:
1191 * {
1192 *     <name of property> -> {
1193 *         "value" -> uint64
1194 *     }
1195 *     ...
1196 *     "redact_snaps" -> {
1197 *         "value" -> uint64 array
1198 * }
1199 */
1200int
1201lzc_get_bookmark_props(const char *bookmark, nvlist_t **props)
1202{
1203	int error;
1204
1205	nvlist_t *innvl = fnvlist_alloc();
1206	error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props);
1207	fnvlist_free(innvl);
1208
1209	return (error);
1210}
1211
1212/*
1213 * Destroys bookmarks.
1214 *
1215 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
1216 * They must all be in the same pool.  Bookmarks are specified as
1217 * <fs>#<bmark>.
1218 *
1219 * Bookmarks that do not exist will be silently ignored.
1220 *
1221 * The return value will be 0 if all bookmarks that existed were destroyed.
1222 *
1223 * Otherwise the return value will be the errno of a (undetermined) bookmark
1224 * that failed, no bookmarks will be destroyed, and the errlist will have an
1225 * entry for each bookmarks that failed.  The value in the errlist will be
1226 * the (int32) error code.
1227 */
1228int
1229lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
1230{
1231	nvpair_t *elem;
1232	int error;
1233	char pool[ZFS_MAX_DATASET_NAME_LEN];
1234
1235	/* determine the pool name */
1236	elem = nvlist_next_nvpair(bmarks, NULL);
1237	if (elem == NULL)
1238		return (0);
1239	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1240	pool[strcspn(pool, "/#")] = '\0';
1241
1242	error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
1243
1244	return (error);
1245}
1246
1247static int
1248lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
1249    uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1250{
1251	int error;
1252	nvlist_t *args;
1253
1254	args = fnvlist_alloc();
1255	fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
1256	fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
1257	fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
1258	fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
1259	fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
1260	error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
1261	fnvlist_free(args);
1262
1263	return (error);
1264}
1265
1266/*
1267 * Executes a channel program.
1268 *
1269 * If this function returns 0 the channel program was successfully loaded and
1270 * ran without failing. Note that individual commands the channel program ran
1271 * may have failed and the channel program is responsible for reporting such
1272 * errors through outnvl if they are important.
1273 *
1274 * This method may also return:
1275 *
1276 * EINVAL   The program contains syntax errors, or an invalid memory or time
1277 *          limit was given. No part of the channel program was executed.
1278 *          If caused by syntax errors, 'outnvl' contains information about the
1279 *          errors.
1280 *
1281 * ECHRNG   The program was executed, but encountered a runtime error, such as
1282 *          calling a function with incorrect arguments, invoking the error()
1283 *          function directly, failing an assert() command, etc. Some portion
1284 *          of the channel program may have executed and committed changes.
1285 *          Information about the failure can be found in 'outnvl'.
1286 *
1287 * ENOMEM   The program fully executed, but the output buffer was not large
1288 *          enough to store the returned value. No output is returned through
1289 *          'outnvl'.
1290 *
1291 * ENOSPC   The program was terminated because it exceeded its memory usage
1292 *          limit. Some portion of the channel program may have executed and
1293 *          committed changes to disk. No output is returned through 'outnvl'.
1294 *
1295 * ETIME    The program was terminated because it exceeded its Lua instruction
1296 *          limit. Some portion of the channel program may have executed and
1297 *          committed changes to disk. No output is returned through 'outnvl'.
1298 */
1299int
1300lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
1301    uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1302{
1303	return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
1304	    memlimit, argnvl, outnvl));
1305}
1306
1307/*
1308 * Creates a checkpoint for the specified pool.
1309 *
1310 * If this function returns 0 the pool was successfully checkpointed.
1311 *
1312 * This method may also return:
1313 *
1314 * ZFS_ERR_CHECKPOINT_EXISTS
1315 *	The pool already has a checkpoint. A pools can only have one
1316 *	checkpoint at most, at any given time.
1317 *
1318 * ZFS_ERR_DISCARDING_CHECKPOINT
1319 * 	ZFS is in the middle of discarding a checkpoint for this pool.
1320 * 	The pool can be checkpointed again once the discard is done.
1321 *
1322 * ZFS_DEVRM_IN_PROGRESS
1323 * 	A vdev is currently being removed. The pool cannot be
1324 * 	checkpointed until the device removal is done.
1325 *
1326 * ZFS_VDEV_TOO_BIG
1327 * 	One or more top-level vdevs exceed the maximum vdev size
1328 * 	supported for this feature.
1329 */
1330int
1331lzc_pool_checkpoint(const char *pool)
1332{
1333	int error;
1334
1335	nvlist_t *result = NULL;
1336	nvlist_t *args = fnvlist_alloc();
1337
1338	error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
1339
1340	fnvlist_free(args);
1341	fnvlist_free(result);
1342
1343	return (error);
1344}
1345
1346/*
1347 * Discard the checkpoint from the specified pool.
1348 *
1349 * If this function returns 0 the checkpoint was successfully discarded.
1350 *
1351 * This method may also return:
1352 *
1353 * ZFS_ERR_NO_CHECKPOINT
1354 * 	The pool does not have a checkpoint.
1355 *
1356 * ZFS_ERR_DISCARDING_CHECKPOINT
1357 * 	ZFS is already in the middle of discarding the checkpoint.
1358 */
1359int
1360lzc_pool_checkpoint_discard(const char *pool)
1361{
1362	int error;
1363
1364	nvlist_t *result = NULL;
1365	nvlist_t *args = fnvlist_alloc();
1366
1367	error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
1368
1369	fnvlist_free(args);
1370	fnvlist_free(result);
1371
1372	return (error);
1373}
1374
1375/*
1376 * Executes a read-only channel program.
1377 *
1378 * A read-only channel program works programmatically the same way as a
1379 * normal channel program executed with lzc_channel_program(). The only
1380 * difference is it runs exclusively in open-context and therefore can
1381 * return faster. The downside to that, is that the program cannot change
1382 * on-disk state by calling functions from the zfs.sync submodule.
1383 *
1384 * The return values of this function (and their meaning) are exactly the
1385 * same as the ones described in lzc_channel_program().
1386 */
1387int
1388lzc_channel_program_nosync(const char *pool, const char *program,
1389    uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1390{
1391	return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
1392	    memlimit, argnvl, outnvl));
1393}
1394
1395/*
1396 * Performs key management functions
1397 *
1398 * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
1399 * load or change a wrapping key, the key should be specified in the
1400 * hidden_args nvlist so that it is not logged.
1401 */
1402int
1403lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
1404    uint_t wkeylen)
1405{
1406	int error;
1407	nvlist_t *ioc_args;
1408	nvlist_t *hidden_args;
1409
1410	if (wkeydata == NULL)
1411		return (EINVAL);
1412
1413	ioc_args = fnvlist_alloc();
1414	hidden_args = fnvlist_alloc();
1415	fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
1416	fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1417	if (noop)
1418		fnvlist_add_boolean(ioc_args, "noop");
1419	error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
1420	nvlist_free(hidden_args);
1421	nvlist_free(ioc_args);
1422
1423	return (error);
1424}
1425
1426int
1427lzc_unload_key(const char *fsname)
1428{
1429	return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
1430}
1431
1432int
1433lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
1434    uint8_t *wkeydata, uint_t wkeylen)
1435{
1436	int error;
1437	nvlist_t *ioc_args = fnvlist_alloc();
1438	nvlist_t *hidden_args = NULL;
1439
1440	fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
1441
1442	if (wkeydata != NULL) {
1443		hidden_args = fnvlist_alloc();
1444		fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
1445		    wkeylen);
1446		fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1447	}
1448
1449	if (props != NULL)
1450		fnvlist_add_nvlist(ioc_args, "props", props);
1451
1452	error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
1453	nvlist_free(hidden_args);
1454	nvlist_free(ioc_args);
1455
1456	return (error);
1457}
1458
1459int
1460lzc_reopen(const char *pool_name, boolean_t scrub_restart)
1461{
1462	nvlist_t *args = fnvlist_alloc();
1463	int error;
1464
1465	fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart);
1466
1467	error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL);
1468	nvlist_free(args);
1469	return (error);
1470}
1471
1472/*
1473 * Changes initializing state.
1474 *
1475 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1476 * The key is ignored.
1477 *
1478 * If there are errors related to vdev arguments, per-vdev errors are returned
1479 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1480 * guid is stringified with PRIu64, and errno is one of the following as
1481 * an int64_t:
1482 *	- ENODEV if the device was not found
1483 *	- EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1484 *	- EROFS if the device is not writeable
1485 *	- EBUSY start requested but the device is already being either
1486 *	        initialized or trimmed
1487 *	- ESRCH cancel/suspend requested but device is not being initialized
1488 *
1489 * If the errlist is empty, then return value will be:
1490 *	- EINVAL if one or more arguments was invalid
1491 *	- Other spa_open failures
1492 *	- 0 if the operation succeeded
1493 */
1494int
1495lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
1496    nvlist_t *vdevs, nvlist_t **errlist)
1497{
1498	int error;
1499
1500	nvlist_t *args = fnvlist_alloc();
1501	fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
1502	fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
1503
1504	error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
1505
1506	fnvlist_free(args);
1507
1508	return (error);
1509}
1510
1511/*
1512 * Changes TRIM state.
1513 *
1514 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1515 * The key is ignored.
1516 *
1517 * If there are errors related to vdev arguments, per-vdev errors are returned
1518 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1519 * guid is stringified with PRIu64, and errno is one of the following as
1520 * an int64_t:
1521 *	- ENODEV if the device was not found
1522 *	- EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1523 *	- EROFS if the device is not writeable
1524 *	- EBUSY start requested but the device is already being either trimmed
1525 *	        or initialized
1526 *	- ESRCH cancel/suspend requested but device is not being initialized
1527 *	- EOPNOTSUPP if the device does not support TRIM (or secure TRIM)
1528 *
1529 * If the errlist is empty, then return value will be:
1530 *	- EINVAL if one or more arguments was invalid
1531 *	- Other spa_open failures
1532 *	- 0 if the operation succeeded
1533 */
1534int
1535lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate,
1536    boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist)
1537{
1538	int error;
1539
1540	nvlist_t *args = fnvlist_alloc();
1541	fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type);
1542	fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs);
1543	fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate);
1544	fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure);
1545
1546	error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist);
1547
1548	fnvlist_free(args);
1549
1550	return (error);
1551}
1552
1553/*
1554 * Create a redaction bookmark named bookname by redacting snapshot with respect
1555 * to all the snapshots in snapnv.
1556 */
1557int
1558lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
1559{
1560	nvlist_t *args = fnvlist_alloc();
1561	fnvlist_add_string(args, "bookname", bookname);
1562	fnvlist_add_nvlist(args, "snapnv", snapnv);
1563	int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL);
1564	fnvlist_free(args);
1565	return (error);
1566}
1567
1568static int
1569wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag,
1570    uint64_t tag, boolean_t *waited)
1571{
1572	nvlist_t *args = fnvlist_alloc();
1573	nvlist_t *result = NULL;
1574
1575	fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity);
1576	if (use_tag)
1577		fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag);
1578
1579	int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result);
1580
1581	if (error == 0 && waited != NULL)
1582		*waited = fnvlist_lookup_boolean_value(result,
1583		    ZPOOL_WAIT_WAITED);
1584
1585	fnvlist_free(args);
1586	fnvlist_free(result);
1587
1588	return (error);
1589}
1590
1591int
1592lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited)
1593{
1594	return (wait_common(pool, activity, B_FALSE, 0, waited));
1595}
1596
1597int
1598lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
1599    boolean_t *waited)
1600{
1601	return (wait_common(pool, activity, B_TRUE, tag, waited));
1602}
1603
1604int
1605lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
1606{
1607	nvlist_t *args = fnvlist_alloc();
1608	nvlist_t *result = NULL;
1609
1610	fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
1611
1612	int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
1613
1614	if (error == 0 && waited != NULL)
1615		*waited = fnvlist_lookup_boolean_value(result,
1616		    ZFS_WAIT_WAITED);
1617
1618	fnvlist_free(args);
1619	fnvlist_free(result);
1620
1621	return (error);
1622}
1623
1624/*
1625 * Set the bootenv contents for the given pool.
1626 */
1627int
1628lzc_set_bootenv(const char *pool, const nvlist_t *env)
1629{
1630	return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL));
1631}
1632
1633/*
1634 * Get the contents of the bootenv of the given pool.
1635 */
1636int
1637lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
1638{
1639	return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
1640}
1641