libzfs_core.c revision 325534
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2017 RackTop Systems.
27 */
28
29/*
30 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
31 * It has the following characteristics:
32 *
33 *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
34 *  threads.  This is accomplished primarily by avoiding global data
35 *  (e.g. caching).  Since it's thread-safe, there is no reason for a
36 *  process to have multiple libzfs "instances".  Therefore, we store
37 *  our few pieces of data (e.g. the file descriptor) in global
38 *  variables.  The fd is reference-counted so that the libzfs_core
39 *  library can be "initialized" multiple times (e.g. by different
40 *  consumers within the same process).
41 *
42 *  - Committed Interface.  The libzfs_core interface will be committed,
43 *  therefore consumers can compile against it and be confident that
44 *  their code will continue to work on future releases of this code.
45 *  Currently, the interface is Evolving (not Committed), but we intend
46 *  to commit to it once it is more complete and we determine that it
47 *  meets the needs of all consumers.
48 *
49 *  - Programatic Error Handling.  libzfs_core communicates errors with
50 *  defined error numbers, and doesn't print anything to stdout/stderr.
51 *
52 *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
53 *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
54 *  between libzfs_core functions and ioctls to /dev/zfs.
55 *
56 *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
57 *  with kernel ioctls, and kernel ioctls are general atomic, each
58 *  libzfs_core function is atomic.  For example, creating multiple
59 *  snapshots with a single call to lzc_snapshot() is atomic -- it
60 *  can't fail with only some of the requested snapshots created, even
61 *  in the event of power loss or system crash.
62 *
63 *  - Continued libzfs Support.  Some higher-level operations (e.g.
64 *  support for "zfs send -R") are too complicated to fit the scope of
65 *  libzfs_core.  This functionality will continue to live in libzfs.
66 *  Where appropriate, libzfs will use the underlying atomic operations
67 *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
68 *  zfs receive" by using individual "send one snapshot", rename,
69 *  destroy, and "receive one snapshot" operations in libzfs_core.
70 *  /sbin/zfs and /zbin/zpool will link with both libzfs and
71 *  libzfs_core.  Other consumers should aim to use only libzfs_core,
72 *  since that will be the supported, stable interface going forwards.
73 */
74
75#define _IN_LIBZFS_CORE_
76
77#include <libzfs_core.h>
78#include <ctype.h>
79#include <unistd.h>
80#include <stdlib.h>
81#include <string.h>
82#include <errno.h>
83#include <fcntl.h>
84#include <pthread.h>
85#include <sys/nvpair.h>
86#include <sys/param.h>
87#include <sys/types.h>
88#include <sys/stat.h>
89#include <sys/zfs_ioctl.h>
90#include "libzfs_core_compat.h"
91#include "libzfs_compat.h"
92
93#ifdef __FreeBSD__
94extern int zfs_ioctl_version;
95#endif
96
97static int g_fd;
98static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
99static int g_refcount;
100
101int
102libzfs_core_init(void)
103{
104	(void) pthread_mutex_lock(&g_lock);
105	if (g_refcount == 0) {
106		g_fd = open("/dev/zfs", O_RDWR);
107		if (g_fd < 0) {
108			(void) pthread_mutex_unlock(&g_lock);
109			return (errno);
110		}
111	}
112	g_refcount++;
113	(void) pthread_mutex_unlock(&g_lock);
114
115	return (0);
116}
117
118void
119libzfs_core_fini(void)
120{
121	(void) pthread_mutex_lock(&g_lock);
122	ASSERT3S(g_refcount, >, 0);
123	g_refcount--;
124	if (g_refcount == 0)
125		(void) close(g_fd);
126	(void) pthread_mutex_unlock(&g_lock);
127}
128
129static int
130lzc_ioctl(zfs_ioc_t ioc, const char *name,
131    nvlist_t *source, nvlist_t **resultp)
132{
133	zfs_cmd_t zc = { 0 };
134	int error = 0;
135	char *packed;
136#ifdef __FreeBSD__
137	nvlist_t *oldsource;
138#endif
139	size_t size;
140
141	ASSERT3S(g_refcount, >, 0);
142
143	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
144
145#ifdef __FreeBSD__
146	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
147		zfs_ioctl_version = get_zfs_ioctl_version();
148
149	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
150		oldsource = source;
151		error = lzc_compat_pre(&zc, &ioc, &source);
152		if (error)
153			return (error);
154	}
155#endif
156
157	packed = fnvlist_pack(source, &size);
158	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
159	zc.zc_nvlist_src_size = size;
160
161	if (resultp != NULL) {
162		*resultp = NULL;
163		zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
164		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
165		    malloc(zc.zc_nvlist_dst_size);
166#ifdef illumos
167		if (zc.zc_nvlist_dst == NULL) {
168#else
169		if (zc.zc_nvlist_dst == 0) {
170#endif
171			error = ENOMEM;
172			goto out;
173		}
174	}
175
176	while (ioctl(g_fd, ioc, &zc) != 0) {
177		/*
178		 * If ioctl exited with ENOMEM, we retry the ioctl after
179		 * increasing the size of the destination nvlist.
180		 *
181		 * Channel programs that exit with ENOMEM probably ran over the
182		 * lua memory sandbox; they should not be retried.
183		 */
184		if (errno == ENOMEM && resultp != NULL &&
185		    ioc != ZFS_IOC_CHANNEL_PROGRAM) {
186			free((void *)(uintptr_t)zc.zc_nvlist_dst);
187			zc.zc_nvlist_dst_size *= 2;
188			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
189			    malloc(zc.zc_nvlist_dst_size);
190#ifdef illumos
191			if (zc.zc_nvlist_dst == NULL) {
192#else
193			if (zc.zc_nvlist_dst == 0) {
194#endif
195				error = ENOMEM;
196				goto out;
197			}
198		} else {
199			error = errno;
200			break;
201		}
202	}
203
204#ifdef __FreeBSD__
205	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
206		lzc_compat_post(&zc, ioc);
207#endif
208	if (zc.zc_nvlist_dst_filled) {
209		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
210		    zc.zc_nvlist_dst_size);
211	}
212#ifdef __FreeBSD__
213	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
214		lzc_compat_outnvl(&zc, ioc, resultp);
215#endif
216out:
217#ifdef __FreeBSD__
218	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
219		if (source != oldsource)
220			nvlist_free(source);
221		source = oldsource;
222	}
223#endif
224	fnvlist_pack_free(packed, size);
225	free((void *)(uintptr_t)zc.zc_nvlist_dst);
226	return (error);
227}
228
229int
230lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props)
231{
232	int error;
233	nvlist_t *args = fnvlist_alloc();
234	fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
235	if (props != NULL)
236		fnvlist_add_nvlist(args, "props", props);
237	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
238	nvlist_free(args);
239	return (error);
240}
241
242int
243lzc_clone(const char *fsname, const char *origin,
244    nvlist_t *props)
245{
246	int error;
247	nvlist_t *args = fnvlist_alloc();
248	fnvlist_add_string(args, "origin", origin);
249	if (props != NULL)
250		fnvlist_add_nvlist(args, "props", props);
251	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
252	nvlist_free(args);
253	return (error);
254}
255
256int
257lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
258{
259	/*
260	 * The promote ioctl is still legacy, so we need to construct our
261	 * own zfs_cmd_t rather than using lzc_ioctl().
262	 */
263	zfs_cmd_t zc = { 0 };
264
265	ASSERT3S(g_refcount, >, 0);
266	VERIFY3S(g_fd, !=, -1);
267
268	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
269	if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
270		int error = errno;
271		if (error == EEXIST && snapnamebuf != NULL)
272			(void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
273		return (error);
274	}
275	return (0);
276}
277
278/*
279 * Creates snapshots.
280 *
281 * The keys in the snaps nvlist are the snapshots to be created.
282 * They must all be in the same pool.
283 *
284 * The props nvlist is properties to set.  Currently only user properties
285 * are supported.  { user:prop_name -> string value }
286 *
287 * The returned results nvlist will have an entry for each snapshot that failed.
288 * The value will be the (int32) error code.
289 *
290 * The return value will be 0 if all snapshots were created, otherwise it will
291 * be the errno of a (unspecified) snapshot that failed.
292 */
293int
294lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
295{
296	nvpair_t *elem;
297	nvlist_t *args;
298	int error;
299	char pool[ZFS_MAX_DATASET_NAME_LEN];
300
301	*errlist = NULL;
302
303	/* determine the pool name */
304	elem = nvlist_next_nvpair(snaps, NULL);
305	if (elem == NULL)
306		return (0);
307	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
308	pool[strcspn(pool, "/@")] = '\0';
309
310	args = fnvlist_alloc();
311	fnvlist_add_nvlist(args, "snaps", snaps);
312	if (props != NULL)
313		fnvlist_add_nvlist(args, "props", props);
314
315	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
316	nvlist_free(args);
317
318	return (error);
319}
320
321/*
322 * Destroys snapshots.
323 *
324 * The keys in the snaps nvlist are the snapshots to be destroyed.
325 * They must all be in the same pool.
326 *
327 * Snapshots that do not exist will be silently ignored.
328 *
329 * If 'defer' is not set, and a snapshot has user holds or clones, the
330 * destroy operation will fail and none of the snapshots will be
331 * destroyed.
332 *
333 * If 'defer' is set, and a snapshot has user holds or clones, it will be
334 * marked for deferred destruction, and will be destroyed when the last hold
335 * or clone is removed/destroyed.
336 *
337 * The return value will be 0 if all snapshots were destroyed (or marked for
338 * later destruction if 'defer' is set) or didn't exist to begin with.
339 *
340 * Otherwise the return value will be the errno of a (unspecified) snapshot
341 * that failed, no snapshots will be destroyed, and the errlist will have an
342 * entry for each snapshot that failed.  The value in the errlist will be
343 * the (int32) error code.
344 */
345int
346lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
347{
348	nvpair_t *elem;
349	nvlist_t *args;
350	int error;
351	char pool[ZFS_MAX_DATASET_NAME_LEN];
352
353	/* determine the pool name */
354	elem = nvlist_next_nvpair(snaps, NULL);
355	if (elem == NULL)
356		return (0);
357	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
358	pool[strcspn(pool, "/@")] = '\0';
359
360	args = fnvlist_alloc();
361	fnvlist_add_nvlist(args, "snaps", snaps);
362	if (defer)
363		fnvlist_add_boolean(args, "defer");
364
365	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
366	nvlist_free(args);
367
368	return (error);
369}
370
371int
372lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
373    uint64_t *usedp)
374{
375	nvlist_t *args;
376	nvlist_t *result;
377	int err;
378	char fs[ZFS_MAX_DATASET_NAME_LEN];
379	char *atp;
380
381	/* determine the fs name */
382	(void) strlcpy(fs, firstsnap, sizeof (fs));
383	atp = strchr(fs, '@');
384	if (atp == NULL)
385		return (EINVAL);
386	*atp = '\0';
387
388	args = fnvlist_alloc();
389	fnvlist_add_string(args, "firstsnap", firstsnap);
390
391	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
392	nvlist_free(args);
393	if (err == 0)
394		*usedp = fnvlist_lookup_uint64(result, "used");
395	fnvlist_free(result);
396
397	return (err);
398}
399
400boolean_t
401lzc_exists(const char *dataset)
402{
403	/*
404	 * The objset_stats ioctl is still legacy, so we need to construct our
405	 * own zfs_cmd_t rather than using lzc_ioctl().
406	 */
407	zfs_cmd_t zc = { 0 };
408
409	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
410	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
411}
412
413/*
414 * Create "user holds" on snapshots.  If there is a hold on a snapshot,
415 * the snapshot can not be destroyed.  (However, it can be marked for deletion
416 * by lzc_destroy_snaps(defer=B_TRUE).)
417 *
418 * The keys in the nvlist are snapshot names.
419 * The snapshots must all be in the same pool.
420 * The value is the name of the hold (string type).
421 *
422 * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
423 * In this case, when the cleanup_fd is closed (including on process
424 * termination), the holds will be released.  If the system is shut down
425 * uncleanly, the holds will be released when the pool is next opened
426 * or imported.
427 *
428 * Holds for snapshots which don't exist will be skipped and have an entry
429 * added to errlist, but will not cause an overall failure.
430 *
431 * The return value will be 0 if all holds, for snapshots that existed,
432 * were succesfully created.
433 *
434 * Otherwise the return value will be the errno of a (unspecified) hold that
435 * failed and no holds will be created.
436 *
437 * In all cases the errlist will have an entry for each hold that failed
438 * (name = snapshot), with its value being the error code (int32).
439 */
440int
441lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
442{
443	char pool[ZFS_MAX_DATASET_NAME_LEN];
444	nvlist_t *args;
445	nvpair_t *elem;
446	int error;
447
448	/* determine the pool name */
449	elem = nvlist_next_nvpair(holds, NULL);
450	if (elem == NULL)
451		return (0);
452	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
453	pool[strcspn(pool, "/@")] = '\0';
454
455	args = fnvlist_alloc();
456	fnvlist_add_nvlist(args, "holds", holds);
457	if (cleanup_fd != -1)
458		fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
459
460	error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
461	nvlist_free(args);
462	return (error);
463}
464
465/*
466 * Release "user holds" on snapshots.  If the snapshot has been marked for
467 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
468 * any clones, and all the user holds are removed, then the snapshot will be
469 * destroyed.
470 *
471 * The keys in the nvlist are snapshot names.
472 * The snapshots must all be in the same pool.
473 * The value is a nvlist whose keys are the holds to remove.
474 *
475 * Holds which failed to release because they didn't exist will have an entry
476 * added to errlist, but will not cause an overall failure.
477 *
478 * The return value will be 0 if the nvl holds was empty or all holds that
479 * existed, were successfully removed.
480 *
481 * Otherwise the return value will be the errno of a (unspecified) hold that
482 * failed to release and no holds will be released.
483 *
484 * In all cases the errlist will have an entry for each hold that failed to
485 * to release.
486 */
487int
488lzc_release(nvlist_t *holds, nvlist_t **errlist)
489{
490	char pool[ZFS_MAX_DATASET_NAME_LEN];
491	nvpair_t *elem;
492
493	/* determine the pool name */
494	elem = nvlist_next_nvpair(holds, NULL);
495	if (elem == NULL)
496		return (0);
497	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
498	pool[strcspn(pool, "/@")] = '\0';
499
500	return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
501}
502
503/*
504 * Retrieve list of user holds on the specified snapshot.
505 *
506 * On success, *holdsp will be set to a nvlist which the caller must free.
507 * The keys are the names of the holds, and the value is the creation time
508 * of the hold (uint64) in seconds since the epoch.
509 */
510int
511lzc_get_holds(const char *snapname, nvlist_t **holdsp)
512{
513	int error;
514	nvlist_t *innvl = fnvlist_alloc();
515	error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
516	fnvlist_free(innvl);
517	return (error);
518}
519
520/*
521 * Generate a zfs send stream for the specified snapshot and write it to
522 * the specified file descriptor.
523 *
524 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
525 *
526 * If "from" is NULL, a full (non-incremental) stream will be sent.
527 * If "from" is non-NULL, it must be the full name of a snapshot or
528 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
529 * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
530 * bookmark must represent an earlier point in the history of "snapname").
531 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
532 * or it can be the origin of "snapname"'s filesystem, or an earlier
533 * snapshot in the origin, etc.
534 *
535 * "fd" is the file descriptor to write the send stream to.
536 *
537 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
538 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
539 * records with drr_blksz > 128K.
540 *
541 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
542 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
543 * which the receiving system must support (as indicated by support
544 * for the "embedded_data" feature).
545 */
546int
547lzc_send(const char *snapname, const char *from, int fd,
548    enum lzc_send_flags flags)
549{
550	return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
551}
552
553int
554lzc_send_resume(const char *snapname, const char *from, int fd,
555    enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
556{
557	nvlist_t *args;
558	int err;
559
560	args = fnvlist_alloc();
561	fnvlist_add_int32(args, "fd", fd);
562	if (from != NULL)
563		fnvlist_add_string(args, "fromsnap", from);
564	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
565		fnvlist_add_boolean(args, "largeblockok");
566	if (flags & LZC_SEND_FLAG_EMBED_DATA)
567		fnvlist_add_boolean(args, "embedok");
568	if (flags & LZC_SEND_FLAG_COMPRESS)
569		fnvlist_add_boolean(args, "compressok");
570	if (resumeobj != 0 || resumeoff != 0) {
571		fnvlist_add_uint64(args, "resume_object", resumeobj);
572		fnvlist_add_uint64(args, "resume_offset", resumeoff);
573	}
574	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
575	nvlist_free(args);
576	return (err);
577}
578
579/*
580 * "from" can be NULL, a snapshot, or a bookmark.
581 *
582 * If from is NULL, a full (non-incremental) stream will be estimated.  This
583 * is calculated very efficiently.
584 *
585 * If from is a snapshot, lzc_send_space uses the deadlists attached to
586 * each snapshot to efficiently estimate the stream size.
587 *
588 * If from is a bookmark, the indirect blocks in the destination snapshot
589 * are traversed, looking for blocks with a birth time since the creation TXG of
590 * the snapshot this bookmark was created from.  This will result in
591 * significantly more I/O and be less efficient than a send space estimation on
592 * an equivalent snapshot.
593 */
594int
595lzc_send_space(const char *snapname, const char *from,
596    enum lzc_send_flags flags, uint64_t *spacep)
597{
598	nvlist_t *args;
599	nvlist_t *result;
600	int err;
601
602	args = fnvlist_alloc();
603	if (from != NULL)
604		fnvlist_add_string(args, "from", from);
605	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
606		fnvlist_add_boolean(args, "largeblockok");
607	if (flags & LZC_SEND_FLAG_EMBED_DATA)
608		fnvlist_add_boolean(args, "embedok");
609	if (flags & LZC_SEND_FLAG_COMPRESS)
610		fnvlist_add_boolean(args, "compressok");
611	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
612	nvlist_free(args);
613	if (err == 0)
614		*spacep = fnvlist_lookup_uint64(result, "space");
615	nvlist_free(result);
616	return (err);
617}
618
619static int
620recv_read(int fd, void *buf, int ilen)
621{
622	char *cp = buf;
623	int rv;
624	int len = ilen;
625
626	do {
627		rv = read(fd, cp, len);
628		cp += rv;
629		len -= rv;
630	} while (rv > 0);
631
632	if (rv < 0 || len != 0)
633		return (EIO);
634
635	return (0);
636}
637
638static int
639recv_impl(const char *snapname, nvlist_t *props, const char *origin,
640    boolean_t force, boolean_t resumable, int fd,
641    const dmu_replay_record_t *begin_record)
642{
643	/*
644	 * The receive ioctl is still legacy, so we need to construct our own
645	 * zfs_cmd_t rather than using zfsc_ioctl().
646	 */
647	zfs_cmd_t zc = { 0 };
648	char *atp;
649	char *packed = NULL;
650	size_t size;
651	int error;
652
653	ASSERT3S(g_refcount, >, 0);
654
655	/* zc_name is name of containing filesystem */
656	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
657	atp = strchr(zc.zc_name, '@');
658	if (atp == NULL)
659		return (EINVAL);
660	*atp = '\0';
661
662	/* if the fs does not exist, try its parent. */
663	if (!lzc_exists(zc.zc_name)) {
664		char *slashp = strrchr(zc.zc_name, '/');
665		if (slashp == NULL)
666			return (ENOENT);
667		*slashp = '\0';
668
669	}
670
671	/* zc_value is full name of the snapshot to create */
672	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
673
674	if (props != NULL) {
675		/* zc_nvlist_src is props to set */
676		packed = fnvlist_pack(props, &size);
677		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
678		zc.zc_nvlist_src_size = size;
679	}
680
681	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
682	if (origin != NULL)
683		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
684
685	/* zc_begin_record is non-byteswapped BEGIN record */
686	if (begin_record == NULL) {
687		error = recv_read(fd, &zc.zc_begin_record,
688		    sizeof (zc.zc_begin_record));
689		if (error != 0)
690			goto out;
691	} else {
692		zc.zc_begin_record = *begin_record;
693	}
694
695	/* zc_cookie is fd to read from */
696	zc.zc_cookie = fd;
697
698	/* zc guid is force flag */
699	zc.zc_guid = force;
700
701	zc.zc_resumable = resumable;
702
703	/* zc_cleanup_fd is unused */
704	zc.zc_cleanup_fd = -1;
705
706	error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
707	if (error != 0)
708		error = errno;
709
710out:
711	if (packed != NULL)
712		fnvlist_pack_free(packed, size);
713	free((void*)(uintptr_t)zc.zc_nvlist_dst);
714	return (error);
715}
716
717/*
718 * The simplest receive case: receive from the specified fd, creating the
719 * specified snapshot.  Apply the specified properties as "received" properties
720 * (which can be overridden by locally-set properties).  If the stream is a
721 * clone, its origin snapshot must be specified by 'origin'.  The 'force'
722 * flag will cause the target filesystem to be rolled back or destroyed if
723 * necessary to receive.
724 *
725 * Return 0 on success or an errno on failure.
726 *
727 * Note: this interface does not work on dedup'd streams
728 * (those with DMU_BACKUP_FEATURE_DEDUP).
729 */
730int
731lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
732    boolean_t force, int fd)
733{
734	return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL));
735}
736
737/*
738 * Like lzc_receive, but if the receive fails due to premature stream
739 * termination, the intermediate state will be preserved on disk.  In this
740 * case, ECKSUM will be returned.  The receive may subsequently be resumed
741 * with a resuming send stream generated by lzc_send_resume().
742 */
743int
744lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
745    boolean_t force, int fd)
746{
747	return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL));
748}
749
750/*
751 * Like lzc_receive, but allows the caller to read the begin record and then to
752 * pass it in.  That could be useful if the caller wants to derive, for example,
753 * the snapname or the origin parameters based on the information contained in
754 * the begin record.
755 * The begin record must be in its original form as read from the stream,
756 * in other words, it should not be byteswapped.
757 *
758 * The 'resumable' parameter allows to obtain the same behavior as with
759 * lzc_receive_resumable.
760 */
761int
762lzc_receive_with_header(const char *snapname, nvlist_t *props,
763    const char *origin, boolean_t force, boolean_t resumable, int fd,
764    const dmu_replay_record_t *begin_record)
765{
766	if (begin_record == NULL)
767		return (EINVAL);
768	return (recv_impl(snapname, props, origin, force, resumable, fd,
769	    begin_record));
770}
771
772/*
773 * Roll back this filesystem or volume to its most recent snapshot.
774 * If snapnamebuf is not NULL, it will be filled in with the name
775 * of the most recent snapshot.
776 * Note that the latest snapshot may change if a new one is concurrently
777 * created or the current one is destroyed.  lzc_rollback_to can be used
778 * to roll back to a specific latest snapshot.
779 *
780 * Return 0 on success or an errno on failure.
781 */
782int
783lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
784{
785	nvlist_t *args;
786	nvlist_t *result;
787	int err;
788
789	args = fnvlist_alloc();
790	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
791	nvlist_free(args);
792	if (err == 0 && snapnamebuf != NULL) {
793		const char *snapname = fnvlist_lookup_string(result, "target");
794		(void) strlcpy(snapnamebuf, snapname, snapnamelen);
795	}
796	nvlist_free(result);
797
798	return (err);
799}
800
801/*
802 * Roll back this filesystem or volume to the specified snapshot,
803 * if possible.
804 *
805 * Return 0 on success or an errno on failure.
806 */
807int
808lzc_rollback_to(const char *fsname, const char *snapname)
809{
810	nvlist_t *args;
811	nvlist_t *result;
812	int err;
813
814	args = fnvlist_alloc();
815	fnvlist_add_string(args, "target", snapname);
816	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
817	nvlist_free(args);
818	nvlist_free(result);
819	return (err);
820}
821
822/*
823 * Creates bookmarks.
824 *
825 * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
826 * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
827 * snapshots must be in the same pool.
828 *
829 * The returned results nvlist will have an entry for each bookmark that failed.
830 * The value will be the (int32) error code.
831 *
832 * The return value will be 0 if all bookmarks were created, otherwise it will
833 * be the errno of a (undetermined) bookmarks that failed.
834 */
835int
836lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
837{
838	nvpair_t *elem;
839	int error;
840	char pool[ZFS_MAX_DATASET_NAME_LEN];
841
842	/* determine the pool name */
843	elem = nvlist_next_nvpair(bookmarks, NULL);
844	if (elem == NULL)
845		return (0);
846	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
847	pool[strcspn(pool, "/#")] = '\0';
848
849	error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
850
851	return (error);
852}
853
854/*
855 * Retrieve bookmarks.
856 *
857 * Retrieve the list of bookmarks for the given file system. The props
858 * parameter is an nvlist of property names (with no values) that will be
859 * returned for each bookmark.
860 *
861 * The following are valid properties on bookmarks, all of which are numbers
862 * (represented as uint64 in the nvlist)
863 *
864 * "guid" - globally unique identifier of the snapshot it refers to
865 * "createtxg" - txg when the snapshot it refers to was created
866 * "creation" - timestamp when the snapshot it refers to was created
867 *
868 * The format of the returned nvlist as follows:
869 * <short name of bookmark> -> {
870 *     <name of property> -> {
871 *         "value" -> uint64
872 *     }
873 *  }
874 */
875int
876lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
877{
878	return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
879}
880
881/*
882 * Destroys bookmarks.
883 *
884 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
885 * They must all be in the same pool.  Bookmarks are specified as
886 * <fs>#<bmark>.
887 *
888 * Bookmarks that do not exist will be silently ignored.
889 *
890 * The return value will be 0 if all bookmarks that existed were destroyed.
891 *
892 * Otherwise the return value will be the errno of a (undetermined) bookmark
893 * that failed, no bookmarks will be destroyed, and the errlist will have an
894 * entry for each bookmarks that failed.  The value in the errlist will be
895 * the (int32) error code.
896 */
897int
898lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
899{
900	nvpair_t *elem;
901	int error;
902	char pool[ZFS_MAX_DATASET_NAME_LEN];
903
904	/* determine the pool name */
905	elem = nvlist_next_nvpair(bmarks, NULL);
906	if (elem == NULL)
907		return (0);
908	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
909	pool[strcspn(pool, "/#")] = '\0';
910
911	error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
912
913	return (error);
914}
915
916/*
917 * Executes a channel program.
918 *
919 * If this function returns 0 the channel program was successfully loaded and
920 * ran without failing. Note that individual commands the channel program ran
921 * may have failed and the channel program is responsible for reporting such
922 * errors through outnvl if they are important.
923 *
924 * This method may also return:
925 *
926 * EINVAL   The program contains syntax errors, or an invalid memory or time
927 *          limit was given. No part of the channel program was executed.
928 *          If caused by syntax errors, 'outnvl' contains information about the
929 *          errors.
930 *
931 * EDOM     The program was executed, but encountered a runtime error, such as
932 *          calling a function with incorrect arguments, invoking the error()
933 *          function directly, failing an assert() command, etc. Some portion
934 *          of the channel program may have executed and committed changes.
935 *          Information about the failure can be found in 'outnvl'.
936 *
937 * ENOMEM   The program fully executed, but the output buffer was not large
938 *          enough to store the returned value. No output is returned through
939 *          'outnvl'.
940 *
941 * ENOSPC   The program was terminated because it exceeded its memory usage
942 *          limit. Some portion of the channel program may have executed and
943 *          committed changes to disk. No output is returned through 'outnvl'.
944 *
945 * ETIMEDOUT The program was terminated because it exceeded its Lua instruction
946 *           limit. Some portion of the channel program may have executed and
947 *           committed changes to disk. No output is returned through 'outnvl'.
948 */
949int
950lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
951    uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
952{
953	int error;
954	nvlist_t *args;
955
956	args = fnvlist_alloc();
957	fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
958	fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
959	fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
960	fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
961	error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
962	fnvlist_free(args);
963
964	return (error);
965}
966