libzfs_core.c revision 288568
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 */
26
27/*
28 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
29 * It has the following characteristics:
30 *
31 *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
32 *  threads.  This is accomplished primarily by avoiding global data
33 *  (e.g. caching).  Since it's thread-safe, there is no reason for a
34 *  process to have multiple libzfs "instances".  Therefore, we store
35 *  our few pieces of data (e.g. the file descriptor) in global
36 *  variables.  The fd is reference-counted so that the libzfs_core
37 *  library can be "initialized" multiple times (e.g. by different
38 *  consumers within the same process).
39 *
40 *  - Committed Interface.  The libzfs_core interface will be committed,
41 *  therefore consumers can compile against it and be confident that
42 *  their code will continue to work on future releases of this code.
43 *  Currently, the interface is Evolving (not Committed), but we intend
44 *  to commit to it once it is more complete and we determine that it
45 *  meets the needs of all consumers.
46 *
47 *  - Programatic Error Handling.  libzfs_core communicates errors with
48 *  defined error numbers, and doesn't print anything to stdout/stderr.
49 *
50 *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
51 *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
52 *  between libzfs_core functions and ioctls to /dev/zfs.
53 *
54 *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
55 *  with kernel ioctls, and kernel ioctls are general atomic, each
56 *  libzfs_core function is atomic.  For example, creating multiple
57 *  snapshots with a single call to lzc_snapshot() is atomic -- it
58 *  can't fail with only some of the requested snapshots created, even
59 *  in the event of power loss or system crash.
60 *
61 *  - Continued libzfs Support.  Some higher-level operations (e.g.
62 *  support for "zfs send -R") are too complicated to fit the scope of
63 *  libzfs_core.  This functionality will continue to live in libzfs.
64 *  Where appropriate, libzfs will use the underlying atomic operations
65 *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
66 *  zfs receive" by using individual "send one snapshot", rename,
67 *  destroy, and "receive one snapshot" operations in libzfs_core.
68 *  /sbin/zfs and /zbin/zpool will link with both libzfs and
69 *  libzfs_core.  Other consumers should aim to use only libzfs_core,
70 *  since that will be the supported, stable interface going forwards.
71 */
72
73#define _IN_LIBZFS_CORE_
74
75#include <libzfs_core.h>
76#include <ctype.h>
77#include <unistd.h>
78#include <stdlib.h>
79#include <string.h>
80#include <errno.h>
81#include <fcntl.h>
82#include <pthread.h>
83#include <sys/nvpair.h>
84#include <sys/param.h>
85#include <sys/types.h>
86#include <sys/stat.h>
87#include <sys/zfs_ioctl.h>
88#include "libzfs_core_compat.h"
89#include "libzfs_compat.h"
90
91#ifdef __FreeBSD__
92extern int zfs_ioctl_version;
93#endif
94
95static int g_fd;
96static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
97static int g_refcount;
98
99int
100libzfs_core_init(void)
101{
102	(void) pthread_mutex_lock(&g_lock);
103	if (g_refcount == 0) {
104		g_fd = open("/dev/zfs", O_RDWR);
105		if (g_fd < 0) {
106			(void) pthread_mutex_unlock(&g_lock);
107			return (errno);
108		}
109	}
110	g_refcount++;
111	(void) pthread_mutex_unlock(&g_lock);
112
113	return (0);
114}
115
116void
117libzfs_core_fini(void)
118{
119	(void) pthread_mutex_lock(&g_lock);
120	ASSERT3S(g_refcount, >, 0);
121	g_refcount--;
122	if (g_refcount == 0)
123		(void) close(g_fd);
124	(void) pthread_mutex_unlock(&g_lock);
125}
126
127static int
128lzc_ioctl(zfs_ioc_t ioc, const char *name,
129    nvlist_t *source, nvlist_t **resultp)
130{
131	zfs_cmd_t zc = { 0 };
132	int error = 0;
133	char *packed;
134#ifdef __FreeBSD__
135	nvlist_t *oldsource;
136#endif
137	size_t size;
138
139	ASSERT3S(g_refcount, >, 0);
140
141	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
142
143#ifdef __FreeBSD__
144	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
145		zfs_ioctl_version = get_zfs_ioctl_version();
146
147	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
148		oldsource = source;
149		error = lzc_compat_pre(&zc, &ioc, &source);
150		if (error)
151			return (error);
152	}
153#endif
154
155	packed = fnvlist_pack(source, &size);
156	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
157	zc.zc_nvlist_src_size = size;
158
159	if (resultp != NULL) {
160		*resultp = NULL;
161		zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
162		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
163		    malloc(zc.zc_nvlist_dst_size);
164#ifdef illumos
165		if (zc.zc_nvlist_dst == NULL) {
166#else
167		if (zc.zc_nvlist_dst == 0) {
168#endif
169			error = ENOMEM;
170			goto out;
171		}
172	}
173
174	while (ioctl(g_fd, ioc, &zc) != 0) {
175		if (errno == ENOMEM && resultp != NULL) {
176			free((void *)(uintptr_t)zc.zc_nvlist_dst);
177			zc.zc_nvlist_dst_size *= 2;
178			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
179			    malloc(zc.zc_nvlist_dst_size);
180#ifdef illumos
181			if (zc.zc_nvlist_dst == NULL) {
182#else
183			if (zc.zc_nvlist_dst == 0) {
184#endif
185				error = ENOMEM;
186				goto out;
187			}
188		} else {
189			error = errno;
190			break;
191		}
192	}
193
194#ifdef __FreeBSD__
195	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
196		lzc_compat_post(&zc, ioc);
197#endif
198	if (zc.zc_nvlist_dst_filled) {
199		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
200		    zc.zc_nvlist_dst_size);
201	}
202#ifdef __FreeBSD__
203	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
204		lzc_compat_outnvl(&zc, ioc, resultp);
205#endif
206out:
207#ifdef __FreeBSD__
208	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
209		if (source != oldsource)
210			nvlist_free(source);
211		source = oldsource;
212	}
213#endif
214	fnvlist_pack_free(packed, size);
215	free((void *)(uintptr_t)zc.zc_nvlist_dst);
216	return (error);
217}
218
219int
220lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props)
221{
222	int error;
223	nvlist_t *args = fnvlist_alloc();
224	fnvlist_add_int32(args, "type", type);
225	if (props != NULL)
226		fnvlist_add_nvlist(args, "props", props);
227	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
228	nvlist_free(args);
229	return (error);
230}
231
232int
233lzc_clone(const char *fsname, const char *origin,
234    nvlist_t *props)
235{
236	int error;
237	nvlist_t *args = fnvlist_alloc();
238	fnvlist_add_string(args, "origin", origin);
239	if (props != NULL)
240		fnvlist_add_nvlist(args, "props", props);
241	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
242	nvlist_free(args);
243	return (error);
244}
245
246/*
247 * Creates snapshots.
248 *
249 * The keys in the snaps nvlist are the snapshots to be created.
250 * They must all be in the same pool.
251 *
252 * The props nvlist is properties to set.  Currently only user properties
253 * are supported.  { user:prop_name -> string value }
254 *
255 * The returned results nvlist will have an entry for each snapshot that failed.
256 * The value will be the (int32) error code.
257 *
258 * The return value will be 0 if all snapshots were created, otherwise it will
259 * be the errno of a (unspecified) snapshot that failed.
260 */
261int
262lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
263{
264	nvpair_t *elem;
265	nvlist_t *args;
266	int error;
267	char pool[MAXNAMELEN];
268
269	*errlist = NULL;
270
271	/* determine the pool name */
272	elem = nvlist_next_nvpair(snaps, NULL);
273	if (elem == NULL)
274		return (0);
275	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
276	pool[strcspn(pool, "/@")] = '\0';
277
278	args = fnvlist_alloc();
279	fnvlist_add_nvlist(args, "snaps", snaps);
280	if (props != NULL)
281		fnvlist_add_nvlist(args, "props", props);
282
283	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
284	nvlist_free(args);
285
286	return (error);
287}
288
289/*
290 * Destroys snapshots.
291 *
292 * The keys in the snaps nvlist are the snapshots to be destroyed.
293 * They must all be in the same pool.
294 *
295 * Snapshots that do not exist will be silently ignored.
296 *
297 * If 'defer' is not set, and a snapshot has user holds or clones, the
298 * destroy operation will fail and none of the snapshots will be
299 * destroyed.
300 *
301 * If 'defer' is set, and a snapshot has user holds or clones, it will be
302 * marked for deferred destruction, and will be destroyed when the last hold
303 * or clone is removed/destroyed.
304 *
305 * The return value will be 0 if all snapshots were destroyed (or marked for
306 * later destruction if 'defer' is set) or didn't exist to begin with.
307 *
308 * Otherwise the return value will be the errno of a (unspecified) snapshot
309 * that failed, no snapshots will be destroyed, and the errlist will have an
310 * entry for each snapshot that failed.  The value in the errlist will be
311 * the (int32) error code.
312 */
313int
314lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
315{
316	nvpair_t *elem;
317	nvlist_t *args;
318	int error;
319	char pool[MAXNAMELEN];
320
321	/* determine the pool name */
322	elem = nvlist_next_nvpair(snaps, NULL);
323	if (elem == NULL)
324		return (0);
325	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
326	pool[strcspn(pool, "/@")] = '\0';
327
328	args = fnvlist_alloc();
329	fnvlist_add_nvlist(args, "snaps", snaps);
330	if (defer)
331		fnvlist_add_boolean(args, "defer");
332
333	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
334	nvlist_free(args);
335
336	return (error);
337}
338
339int
340lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
341    uint64_t *usedp)
342{
343	nvlist_t *args;
344	nvlist_t *result;
345	int err;
346	char fs[MAXNAMELEN];
347	char *atp;
348
349	/* determine the fs name */
350	(void) strlcpy(fs, firstsnap, sizeof (fs));
351	atp = strchr(fs, '@');
352	if (atp == NULL)
353		return (EINVAL);
354	*atp = '\0';
355
356	args = fnvlist_alloc();
357	fnvlist_add_string(args, "firstsnap", firstsnap);
358
359	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
360	nvlist_free(args);
361	if (err == 0)
362		*usedp = fnvlist_lookup_uint64(result, "used");
363	fnvlist_free(result);
364
365	return (err);
366}
367
368boolean_t
369lzc_exists(const char *dataset)
370{
371	/*
372	 * The objset_stats ioctl is still legacy, so we need to construct our
373	 * own zfs_cmd_t rather than using zfsc_ioctl().
374	 */
375	zfs_cmd_t zc = { 0 };
376
377	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
378	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
379}
380
381/*
382 * Create "user holds" on snapshots.  If there is a hold on a snapshot,
383 * the snapshot can not be destroyed.  (However, it can be marked for deletion
384 * by lzc_destroy_snaps(defer=B_TRUE).)
385 *
386 * The keys in the nvlist are snapshot names.
387 * The snapshots must all be in the same pool.
388 * The value is the name of the hold (string type).
389 *
390 * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
391 * In this case, when the cleanup_fd is closed (including on process
392 * termination), the holds will be released.  If the system is shut down
393 * uncleanly, the holds will be released when the pool is next opened
394 * or imported.
395 *
396 * Holds for snapshots which don't exist will be skipped and have an entry
397 * added to errlist, but will not cause an overall failure.
398 *
399 * The return value will be 0 if all holds, for snapshots that existed,
400 * were succesfully created.
401 *
402 * Otherwise the return value will be the errno of a (unspecified) hold that
403 * failed and no holds will be created.
404 *
405 * In all cases the errlist will have an entry for each hold that failed
406 * (name = snapshot), with its value being the error code (int32).
407 */
408int
409lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
410{
411	char pool[MAXNAMELEN];
412	nvlist_t *args;
413	nvpair_t *elem;
414	int error;
415
416	/* determine the pool name */
417	elem = nvlist_next_nvpair(holds, NULL);
418	if (elem == NULL)
419		return (0);
420	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
421	pool[strcspn(pool, "/@")] = '\0';
422
423	args = fnvlist_alloc();
424	fnvlist_add_nvlist(args, "holds", holds);
425	if (cleanup_fd != -1)
426		fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
427
428	error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
429	nvlist_free(args);
430	return (error);
431}
432
433/*
434 * Release "user holds" on snapshots.  If the snapshot has been marked for
435 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
436 * any clones, and all the user holds are removed, then the snapshot will be
437 * destroyed.
438 *
439 * The keys in the nvlist are snapshot names.
440 * The snapshots must all be in the same pool.
441 * The value is a nvlist whose keys are the holds to remove.
442 *
443 * Holds which failed to release because they didn't exist will have an entry
444 * added to errlist, but will not cause an overall failure.
445 *
446 * The return value will be 0 if the nvl holds was empty or all holds that
447 * existed, were successfully removed.
448 *
449 * Otherwise the return value will be the errno of a (unspecified) hold that
450 * failed to release and no holds will be released.
451 *
452 * In all cases the errlist will have an entry for each hold that failed to
453 * to release.
454 */
455int
456lzc_release(nvlist_t *holds, nvlist_t **errlist)
457{
458	char pool[MAXNAMELEN];
459	nvpair_t *elem;
460
461	/* determine the pool name */
462	elem = nvlist_next_nvpair(holds, NULL);
463	if (elem == NULL)
464		return (0);
465	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
466	pool[strcspn(pool, "/@")] = '\0';
467
468	return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
469}
470
471/*
472 * Retrieve list of user holds on the specified snapshot.
473 *
474 * On success, *holdsp will be set to a nvlist which the caller must free.
475 * The keys are the names of the holds, and the value is the creation time
476 * of the hold (uint64) in seconds since the epoch.
477 */
478int
479lzc_get_holds(const char *snapname, nvlist_t **holdsp)
480{
481	int error;
482	nvlist_t *innvl = fnvlist_alloc();
483	error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
484	fnvlist_free(innvl);
485	return (error);
486}
487
488/*
489 * Generate a zfs send stream for the specified snapshot and write it to
490 * the specified file descriptor.
491 *
492 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
493 *
494 * If "from" is NULL, a full (non-incremental) stream will be sent.
495 * If "from" is non-NULL, it must be the full name of a snapshot or
496 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
497 * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
498 * bookmark must represent an earlier point in the history of "snapname").
499 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
500 * or it can be the origin of "snapname"'s filesystem, or an earlier
501 * snapshot in the origin, etc.
502 *
503 * "fd" is the file descriptor to write the send stream to.
504 *
505 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
506 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
507 * records with drr_blksz > 128K.
508 *
509 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
510 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
511 * which the receiving system must support (as indicated by support
512 * for the "embedded_data" feature).
513 */
514int
515lzc_send(const char *snapname, const char *from, int fd,
516    enum lzc_send_flags flags)
517{
518	nvlist_t *args;
519	int err;
520
521	args = fnvlist_alloc();
522	fnvlist_add_int32(args, "fd", fd);
523	if (from != NULL)
524		fnvlist_add_string(args, "fromsnap", from);
525	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
526		fnvlist_add_boolean(args, "largeblockok");
527	if (flags & LZC_SEND_FLAG_EMBED_DATA)
528		fnvlist_add_boolean(args, "embedok");
529	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
530	nvlist_free(args);
531	return (err);
532}
533
534/*
535 * "from" can be NULL, a snapshot, or a bookmark.
536 *
537 * If from is NULL, a full (non-incremental) stream will be estimated.  This
538 * is calculated very efficiently.
539 *
540 * If from is a snapshot, lzc_send_space uses the deadlists attached to
541 * each snapshot to efficiently estimate the stream size.
542 *
543 * If from is a bookmark, the indirect blocks in the destination snapshot
544 * are traversed, looking for blocks with a birth time since the creation TXG of
545 * the snapshot this bookmark was created from.  This will result in
546 * significantly more I/O and be less efficient than a send space estimation on
547 * an equivalent snapshot.
548 */
549int
550lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
551{
552	nvlist_t *args;
553	nvlist_t *result;
554	int err;
555
556	args = fnvlist_alloc();
557	if (from != NULL)
558		fnvlist_add_string(args, "from", from);
559	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
560	nvlist_free(args);
561	if (err == 0)
562		*spacep = fnvlist_lookup_uint64(result, "space");
563	nvlist_free(result);
564	return (err);
565}
566
567static int
568recv_read(int fd, void *buf, int ilen)
569{
570	char *cp = buf;
571	int rv;
572	int len = ilen;
573
574	do {
575		rv = read(fd, cp, len);
576		cp += rv;
577		len -= rv;
578	} while (rv > 0);
579
580	if (rv < 0 || len != 0)
581		return (EIO);
582
583	return (0);
584}
585
586/*
587 * The simplest receive case: receive from the specified fd, creating the
588 * specified snapshot.  Apply the specified properties a "received" properties
589 * (which can be overridden by locally-set properties).  If the stream is a
590 * clone, its origin snapshot must be specified by 'origin'.  The 'force'
591 * flag will cause the target filesystem to be rolled back or destroyed if
592 * necessary to receive.
593 *
594 * Return 0 on success or an errno on failure.
595 *
596 * Note: this interface does not work on dedup'd streams
597 * (those with DMU_BACKUP_FEATURE_DEDUP).
598 */
599int
600lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
601    boolean_t force, int fd)
602{
603	/*
604	 * The receive ioctl is still legacy, so we need to construct our own
605	 * zfs_cmd_t rather than using zfsc_ioctl().
606	 */
607	zfs_cmd_t zc = { 0 };
608	char *atp;
609	char *packed = NULL;
610	size_t size;
611	dmu_replay_record_t drr;
612	int error;
613
614	ASSERT3S(g_refcount, >, 0);
615
616	/* zc_name is name of containing filesystem */
617	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
618	atp = strchr(zc.zc_name, '@');
619	if (atp == NULL)
620		return (EINVAL);
621	*atp = '\0';
622
623	/* if the fs does not exist, try its parent. */
624	if (!lzc_exists(zc.zc_name)) {
625		char *slashp = strrchr(zc.zc_name, '/');
626		if (slashp == NULL)
627			return (ENOENT);
628		*slashp = '\0';
629
630	}
631
632	/* zc_value is full name of the snapshot to create */
633	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
634
635	if (props != NULL) {
636		/* zc_nvlist_src is props to set */
637		packed = fnvlist_pack(props, &size);
638		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
639		zc.zc_nvlist_src_size = size;
640	}
641
642	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
643	if (origin != NULL)
644		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
645
646	/* zc_begin_record is non-byteswapped BEGIN record */
647	error = recv_read(fd, &drr, sizeof (drr));
648	if (error != 0)
649		goto out;
650	zc.zc_begin_record = drr.drr_u.drr_begin;
651
652	/* zc_cookie is fd to read from */
653	zc.zc_cookie = fd;
654
655	/* zc guid is force flag */
656	zc.zc_guid = force;
657
658	/* zc_cleanup_fd is unused */
659	zc.zc_cleanup_fd = -1;
660
661	error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
662	if (error != 0)
663		error = errno;
664
665out:
666	if (packed != NULL)
667		fnvlist_pack_free(packed, size);
668	free((void*)(uintptr_t)zc.zc_nvlist_dst);
669	return (error);
670}
671
672/*
673 * Roll back this filesystem or volume to its most recent snapshot.
674 * If snapnamebuf is not NULL, it will be filled in with the name
675 * of the most recent snapshot.
676 *
677 * Return 0 on success or an errno on failure.
678 */
679int
680lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
681{
682	nvlist_t *args;
683	nvlist_t *result;
684	int err;
685
686	args = fnvlist_alloc();
687	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
688	nvlist_free(args);
689	if (err == 0 && snapnamebuf != NULL) {
690		const char *snapname = fnvlist_lookup_string(result, "target");
691		(void) strlcpy(snapnamebuf, snapname, snapnamelen);
692	}
693	return (err);
694}
695
696/*
697 * Creates bookmarks.
698 *
699 * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
700 * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
701 * snapshots must be in the same pool.
702 *
703 * The returned results nvlist will have an entry for each bookmark that failed.
704 * The value will be the (int32) error code.
705 *
706 * The return value will be 0 if all bookmarks were created, otherwise it will
707 * be the errno of a (undetermined) bookmarks that failed.
708 */
709int
710lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
711{
712	nvpair_t *elem;
713	int error;
714	char pool[MAXNAMELEN];
715
716	/* determine the pool name */
717	elem = nvlist_next_nvpair(bookmarks, NULL);
718	if (elem == NULL)
719		return (0);
720	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
721	pool[strcspn(pool, "/#")] = '\0';
722
723	error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
724
725	return (error);
726}
727
728/*
729 * Retrieve bookmarks.
730 *
731 * Retrieve the list of bookmarks for the given file system. The props
732 * parameter is an nvlist of property names (with no values) that will be
733 * returned for each bookmark.
734 *
735 * The following are valid properties on bookmarks, all of which are numbers
736 * (represented as uint64 in the nvlist)
737 *
738 * "guid" - globally unique identifier of the snapshot it refers to
739 * "createtxg" - txg when the snapshot it refers to was created
740 * "creation" - timestamp when the snapshot it refers to was created
741 *
742 * The format of the returned nvlist as follows:
743 * <short name of bookmark> -> {
744 *     <name of property> -> {
745 *         "value" -> uint64
746 *     }
747 *  }
748 */
749int
750lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
751{
752	return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
753}
754
755/*
756 * Destroys bookmarks.
757 *
758 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
759 * They must all be in the same pool.  Bookmarks are specified as
760 * <fs>#<bmark>.
761 *
762 * Bookmarks that do not exist will be silently ignored.
763 *
764 * The return value will be 0 if all bookmarks that existed were destroyed.
765 *
766 * Otherwise the return value will be the errno of a (undetermined) bookmark
767 * that failed, no bookmarks will be destroyed, and the errlist will have an
768 * entry for each bookmarks that failed.  The value in the errlist will be
769 * the (int32) error code.
770 */
771int
772lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
773{
774	nvpair_t *elem;
775	int error;
776	char pool[MAXNAMELEN];
777
778	/* determine the pool name */
779	elem = nvlist_next_nvpair(bmarks, NULL);
780	if (elem == NULL)
781		return (0);
782	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
783	pool[strcspn(pool, "/#")] = '\0';
784
785	error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
786
787	return (error);
788}
789