1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27 * All rights reserved
28 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
32 * Copyright (c) 2019 Datto Inc.
33 */
34
35#include <assert.h>
36#include <ctype.h>
37#include <errno.h>
38#include <libintl.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <unistd.h>
43#include <stddef.h>
44#include <fcntl.h>
45#include <sys/mount.h>
46#include <sys/mntent.h>
47#include <sys/mnttab.h>
48#include <sys/avl.h>
49#include <sys/debug.h>
50#include <sys/stat.h>
51#include <pthread.h>
52#include <umem.h>
53#include <time.h>
54
55#include <libzfs.h>
56#include <libzfs_core.h>
57#include <libzutil.h>
58
59#include "zfs_namecheck.h"
60#include "zfs_prop.h"
61#include "zfs_fletcher.h"
62#include "libzfs_impl.h"
63#include <cityhash.h>
64#include <zlib.h>
65#include <sys/zio_checksum.h>
66#include <sys/dsl_crypt.h>
67#include <sys/ddt.h>
68#include <sys/socket.h>
69#include <sys/sha2.h>
70
71static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
72    recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **,
73    const char *, nvlist_t *);
74static int guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
75    uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
76    uint64_t num_redact_snaps, char *name);
77static int guid_to_name(libzfs_handle_t *, const char *,
78    uint64_t, boolean_t, char *);
79
80typedef struct progress_arg {
81	zfs_handle_t *pa_zhp;
82	int pa_fd;
83	boolean_t pa_parsable;
84	boolean_t pa_estimate;
85	int pa_verbosity;
86	boolean_t pa_astitle;
87	boolean_t pa_progress;
88	uint64_t pa_size;
89} progress_arg_t;
90
91static int
92dump_record(dmu_replay_record_t *drr, void *payload, size_t payload_len,
93    zio_cksum_t *zc, int outfd)
94{
95	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
96	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
97	fletcher_4_incremental_native(drr,
98	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
99	if (drr->drr_type != DRR_BEGIN) {
100		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
101		    drr_checksum.drr_checksum));
102		drr->drr_u.drr_checksum.drr_checksum = *zc;
103	}
104	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
105	    sizeof (zio_cksum_t), zc);
106	if (write(outfd, drr, sizeof (*drr)) == -1)
107		return (errno);
108	if (payload_len != 0) {
109		fletcher_4_incremental_native(payload, payload_len, zc);
110		if (write(outfd, payload, payload_len) == -1)
111			return (errno);
112	}
113	return (0);
114}
115
116/*
117 * Routines for dealing with the AVL tree of fs-nvlists
118 */
119typedef struct fsavl_node {
120	avl_node_t fn_node;
121	nvlist_t *fn_nvfs;
122	const char *fn_snapname;
123	uint64_t fn_guid;
124} fsavl_node_t;
125
126static int
127fsavl_compare(const void *arg1, const void *arg2)
128{
129	const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
130	const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
131
132	return (TREE_CMP(fn1->fn_guid, fn2->fn_guid));
133}
134
135/*
136 * Given the GUID of a snapshot, find its containing filesystem and
137 * (optionally) name.
138 */
139static nvlist_t *
140fsavl_find(avl_tree_t *avl, uint64_t snapguid, const char **snapname)
141{
142	fsavl_node_t fn_find;
143	fsavl_node_t *fn;
144
145	fn_find.fn_guid = snapguid;
146
147	fn = avl_find(avl, &fn_find, NULL);
148	if (fn) {
149		if (snapname)
150			*snapname = fn->fn_snapname;
151		return (fn->fn_nvfs);
152	}
153	return (NULL);
154}
155
156static void
157fsavl_destroy(avl_tree_t *avl)
158{
159	fsavl_node_t *fn;
160	void *cookie;
161
162	if (avl == NULL)
163		return;
164
165	cookie = NULL;
166	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
167		free(fn);
168	avl_destroy(avl);
169	free(avl);
170}
171
172/*
173 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
174 */
175static avl_tree_t *
176fsavl_create(nvlist_t *fss)
177{
178	avl_tree_t *fsavl;
179	nvpair_t *fselem = NULL;
180
181	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
182		return (NULL);
183
184	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
185	    offsetof(fsavl_node_t, fn_node));
186
187	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
188		nvlist_t *nvfs, *snaps;
189		nvpair_t *snapelem = NULL;
190
191		nvfs = fnvpair_value_nvlist(fselem);
192		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
193
194		while ((snapelem =
195		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
196			fsavl_node_t *fn;
197
198			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
199				fsavl_destroy(fsavl);
200				return (NULL);
201			}
202			fn->fn_nvfs = nvfs;
203			fn->fn_snapname = nvpair_name(snapelem);
204			fn->fn_guid = fnvpair_value_uint64(snapelem);
205
206			/*
207			 * Note: if there are multiple snaps with the
208			 * same GUID, we ignore all but one.
209			 */
210			avl_index_t where = 0;
211			if (avl_find(fsavl, fn, &where) == NULL)
212				avl_insert(fsavl, fn, where);
213			else
214				free(fn);
215		}
216	}
217
218	return (fsavl);
219}
220
221/*
222 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
223 */
224typedef struct send_data {
225	/*
226	 * assigned inside every recursive call,
227	 * restored from *_save on return:
228	 *
229	 * guid of fromsnap snapshot in parent dataset
230	 * txg of fromsnap snapshot in current dataset
231	 * txg of tosnap snapshot in current dataset
232	 */
233
234	uint64_t parent_fromsnap_guid;
235	uint64_t fromsnap_txg;
236	uint64_t tosnap_txg;
237
238	/* the nvlists get accumulated during depth-first traversal */
239	nvlist_t *parent_snaps;
240	nvlist_t *fss;
241	nvlist_t *snapprops;
242	nvlist_t *snapholds;	/* user holds */
243
244	/* send-receive configuration, does not change during traversal */
245	const char *fsname;
246	const char *fromsnap;
247	const char *tosnap;
248	boolean_t recursive;
249	boolean_t raw;
250	boolean_t doall;
251	boolean_t replicate;
252	boolean_t skipmissing;
253	boolean_t verbose;
254	boolean_t backup;
255	boolean_t seenfrom;
256	boolean_t seento;
257	boolean_t holds;	/* were holds requested with send -h */
258	boolean_t props;
259
260	/*
261	 * The header nvlist is of the following format:
262	 * {
263	 *   "tosnap" -> string
264	 *   "fromsnap" -> string (if incremental)
265	 *   "fss" -> {
266	 *	id -> {
267	 *
268	 *	 "name" -> string (full name; for debugging)
269	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
270	 *
271	 *	 "props" -> { name -> value (only if set here) }
272	 *	 "snaps" -> { name (lastname) -> number (guid) }
273	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
274	 *	 "snapholds" -> { name (lastname) -> { holdname -> crtime } }
275	 *
276	 *	 "origin" -> number (guid) (if clone)
277	 *	 "is_encroot" -> boolean
278	 *	 "sent" -> boolean (not on-disk)
279	 *	}
280	 *   }
281	 * }
282	 *
283	 */
284} send_data_t;
285
286static void
287send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv);
288
289/*
290 * Collect guid, valid props, optionally holds, etc. of a snapshot.
291 * This interface is intended for use as a zfs_iter_snapshots_v2_sorted visitor.
292 */
293static int
294send_iterate_snap(zfs_handle_t *zhp, void *arg)
295{
296	send_data_t *sd = arg;
297	uint64_t guid = zhp->zfs_dmustats.dds_guid;
298	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
299	boolean_t isfromsnap, istosnap, istosnapwithnofrom;
300	char *snapname;
301	const char *from = sd->fromsnap;
302	const char *to = sd->tosnap;
303
304	snapname = strrchr(zhp->zfs_name, '@');
305	assert(snapname != NULL);
306	++snapname;
307
308	isfromsnap = (from != NULL && strcmp(from, snapname) == 0);
309	istosnap = (to != NULL && strcmp(to, snapname) == 0);
310	istosnapwithnofrom = (istosnap && from == NULL);
311
312	if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
313		if (sd->verbose) {
314			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
315			    "skipping snapshot %s because it was created "
316			    "after the destination snapshot (%s)\n"),
317			    zhp->zfs_name, to);
318		}
319		zfs_close(zhp);
320		return (0);
321	}
322
323	fnvlist_add_uint64(sd->parent_snaps, snapname, guid);
324
325	/*
326	 * NB: if there is no fromsnap here (it's a newly created fs in
327	 * an incremental replication), we will substitute the tosnap.
328	 */
329	if (isfromsnap || (sd->parent_fromsnap_guid == 0 && istosnap))
330		sd->parent_fromsnap_guid = guid;
331
332	if (!sd->recursive) {
333		/*
334		 * To allow a doall stream to work properly
335		 * with a NULL fromsnap
336		 */
337		if (sd->doall && from == NULL && !sd->seenfrom)
338			sd->seenfrom = B_TRUE;
339
340		if (!sd->seenfrom && isfromsnap) {
341			sd->seenfrom = B_TRUE;
342			zfs_close(zhp);
343			return (0);
344		}
345
346		if ((sd->seento || !sd->seenfrom) && !istosnapwithnofrom) {
347			zfs_close(zhp);
348			return (0);
349		}
350
351		if (istosnap)
352			sd->seento = B_TRUE;
353	}
354
355	nvlist_t *nv = fnvlist_alloc();
356	send_iterate_prop(zhp, sd->backup, nv);
357	fnvlist_add_nvlist(sd->snapprops, snapname, nv);
358	fnvlist_free(nv);
359
360	if (sd->holds) {
361		nvlist_t *holds;
362		if (lzc_get_holds(zhp->zfs_name, &holds) == 0) {
363			fnvlist_add_nvlist(sd->snapholds, snapname, holds);
364			fnvlist_free(holds);
365		}
366	}
367
368	zfs_close(zhp);
369	return (0);
370}
371
372/*
373 * Collect all valid props from the handle snap into an nvlist.
374 */
375static void
376send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv)
377{
378	nvlist_t *props;
379
380	if (received_only)
381		props = zfs_get_recvd_props(zhp);
382	else
383		props = zhp->zfs_props;
384
385	nvpair_t *elem = NULL;
386	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
387		const char *propname = nvpair_name(elem);
388		zfs_prop_t prop = zfs_name_to_prop(propname);
389
390		if (!zfs_prop_user(propname)) {
391			/*
392			 * Realistically, this should never happen.  However,
393			 * we want the ability to add DSL properties without
394			 * needing to make incompatible version changes.  We
395			 * need to ignore unknown properties to allow older
396			 * software to still send datasets containing these
397			 * properties, with the unknown properties elided.
398			 */
399			if (prop == ZPROP_INVAL)
400				continue;
401
402			if (zfs_prop_readonly(prop))
403				continue;
404		}
405
406		nvlist_t *propnv = fnvpair_value_nvlist(elem);
407
408		boolean_t isspacelimit = (prop == ZFS_PROP_QUOTA ||
409		    prop == ZFS_PROP_RESERVATION ||
410		    prop == ZFS_PROP_REFQUOTA ||
411		    prop == ZFS_PROP_REFRESERVATION);
412		if (isspacelimit && zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
413			continue;
414
415		const char *source;
416		if (nvlist_lookup_string(propnv, ZPROP_SOURCE, &source) == 0) {
417			if (strcmp(source, zhp->zfs_name) != 0 &&
418			    strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)
419				continue;
420		} else {
421			/*
422			 * May have no source before SPA_VERSION_RECVD_PROPS,
423			 * but is still modifiable.
424			 */
425			if (!isspacelimit)
426				continue;
427		}
428
429		if (zfs_prop_user(propname) ||
430		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
431			const char *value;
432			value = fnvlist_lookup_string(propnv, ZPROP_VALUE);
433			fnvlist_add_string(nv, propname, value);
434		} else {
435			uint64_t value;
436			value = fnvlist_lookup_uint64(propnv, ZPROP_VALUE);
437			fnvlist_add_uint64(nv, propname, value);
438		}
439	}
440}
441
442/*
443 * returns snapshot guid
444 * and returns 0 if the snapshot does not exist
445 */
446static uint64_t
447get_snap_guid(libzfs_handle_t *hdl, const char *fs, const char *snap)
448{
449	char name[MAXPATHLEN + 1];
450	uint64_t guid = 0;
451
452	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
453		return (guid);
454
455	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
456	zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
457	if (zhp != NULL) {
458		guid = zfs_prop_get_int(zhp, ZFS_PROP_GUID);
459		zfs_close(zhp);
460	}
461
462	return (guid);
463}
464
465/*
466 * returns snapshot creation txg
467 * and returns 0 if the snapshot does not exist
468 */
469static uint64_t
470get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
471{
472	char name[ZFS_MAX_DATASET_NAME_LEN];
473	uint64_t txg = 0;
474
475	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
476		return (txg);
477
478	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
479	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
480		zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
481		if (zhp != NULL) {
482			txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
483			zfs_close(zhp);
484		}
485	}
486
487	return (txg);
488}
489
490/*
491 * Recursively generate nvlists describing datasets.  See comment
492 * for the data structure send_data_t above for description of contents
493 * of the nvlist.
494 */
495static int
496send_iterate_fs(zfs_handle_t *zhp, void *arg)
497{
498	send_data_t *sd = arg;
499	nvlist_t *nvfs = NULL, *nv = NULL;
500	int rv = 0;
501	uint64_t min_txg = 0, max_txg = 0;
502	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
503	uint64_t guid = zhp->zfs_dmustats.dds_guid;
504	uint64_t fromsnap_txg, tosnap_txg;
505	char guidstring[64];
506
507	/* These fields are restored on return from a recursive call. */
508	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
509	uint64_t fromsnap_txg_save = sd->fromsnap_txg;
510	uint64_t tosnap_txg_save = sd->tosnap_txg;
511
512	fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
513	if (fromsnap_txg != 0)
514		sd->fromsnap_txg = fromsnap_txg;
515
516	tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
517	if (tosnap_txg != 0)
518		sd->tosnap_txg = tosnap_txg;
519
520	/*
521	 * On the send side, if the current dataset does not have tosnap,
522	 * perform two additional checks:
523	 *
524	 * - Skip sending the current dataset if it was created later than
525	 *   the parent tosnap.
526	 * - Return error if the current dataset was created earlier than
527	 *   the parent tosnap, unless --skip-missing specified. Then
528	 *   just print a warning.
529	 */
530	if (sd->tosnap != NULL && tosnap_txg == 0) {
531		if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
532			if (sd->verbose) {
533				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
534				    "skipping dataset %s: snapshot %s does "
535				    "not exist\n"), zhp->zfs_name, sd->tosnap);
536			}
537		} else if (sd->skipmissing) {
538			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
539			    "WARNING: skipping dataset %s and its children:"
540			    " snapshot %s does not exist\n"),
541			    zhp->zfs_name, sd->tosnap);
542		} else {
543			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
544			    "cannot send %s@%s%s: snapshot %s@%s does not "
545			    "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
546			    dgettext(TEXT_DOMAIN, " recursively") : "",
547			    zhp->zfs_name, sd->tosnap);
548			rv = EZFS_NOENT;
549		}
550		goto out;
551	}
552
553	nvfs = fnvlist_alloc();
554	fnvlist_add_string(nvfs, "name", zhp->zfs_name);
555	fnvlist_add_uint64(nvfs, "parentfromsnap", sd->parent_fromsnap_guid);
556
557	if (zhp->zfs_dmustats.dds_origin[0] != '\0') {
558		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
559		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
560		if (origin == NULL) {
561			rv = -1;
562			goto out;
563		}
564		fnvlist_add_uint64(nvfs, "origin",
565		    origin->zfs_dmustats.dds_guid);
566		zfs_close(origin);
567	}
568
569	/* Iterate over props. */
570	if (sd->props || sd->backup || sd->recursive) {
571		nv = fnvlist_alloc();
572		send_iterate_prop(zhp, sd->backup, nv);
573		fnvlist_add_nvlist(nvfs, "props", nv);
574	}
575	if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
576		boolean_t encroot;
577
578		/* Determine if this dataset is an encryption root. */
579		if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) {
580			rv = -1;
581			goto out;
582		}
583
584		if (encroot)
585			fnvlist_add_boolean(nvfs, "is_encroot");
586
587		/*
588		 * Encrypted datasets can only be sent with properties if
589		 * the raw flag is specified because the receive side doesn't
590		 * currently have a mechanism for recursively asking the user
591		 * for new encryption parameters.
592		 */
593		if (!sd->raw) {
594			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
595			    "cannot send %s@%s: encrypted dataset %s may not "
596			    "be sent with properties without the raw flag\n"),
597			    sd->fsname, sd->tosnap, zhp->zfs_name);
598			rv = -1;
599			goto out;
600		}
601
602	}
603
604	/*
605	 * Iterate over snaps, and set sd->parent_fromsnap_guid.
606	 *
607	 * If this is a "doall" send, a replicate send or we're just trying
608	 * to gather a list of previous snapshots, iterate through all the
609	 * snaps in the txg range. Otherwise just look at the one we're
610	 * interested in.
611	 */
612	sd->parent_fromsnap_guid = 0;
613	sd->parent_snaps = fnvlist_alloc();
614	sd->snapprops = fnvlist_alloc();
615	if (sd->holds)
616		sd->snapholds = fnvlist_alloc();
617	if (sd->doall || sd->replicate || sd->tosnap == NULL) {
618		if (!sd->replicate && fromsnap_txg != 0)
619			min_txg = fromsnap_txg;
620		if (!sd->replicate && tosnap_txg != 0)
621			max_txg = tosnap_txg;
622		(void) zfs_iter_snapshots_sorted_v2(zhp, 0, send_iterate_snap,
623		    sd, min_txg, max_txg);
624	} else {
625		char snapname[MAXPATHLEN] = { 0 };
626		zfs_handle_t *snap;
627
628		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
629		    zhp->zfs_name, sd->tosnap);
630		if (sd->fromsnap != NULL)
631			sd->seenfrom = B_TRUE;
632		snap = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
633		if (snap != NULL)
634			(void) send_iterate_snap(snap, sd);
635	}
636
637	fnvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps);
638	fnvlist_free(sd->parent_snaps);
639	fnvlist_add_nvlist(nvfs, "snapprops", sd->snapprops);
640	fnvlist_free(sd->snapprops);
641	if (sd->holds) {
642		fnvlist_add_nvlist(nvfs, "snapholds", sd->snapholds);
643		fnvlist_free(sd->snapholds);
644	}
645
646	/* Do not allow the size of the properties list to exceed the limit */
647	if ((fnvlist_size(nvfs) + fnvlist_size(sd->fss)) >
648	    zhp->zfs_hdl->libzfs_max_nvlist) {
649		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
650		    "warning: cannot send %s@%s: the size of the list of "
651		    "snapshots and properties is too large to be received "
652		    "successfully.\n"
653		    "Select a smaller number of snapshots to send.\n"),
654		    zhp->zfs_name, sd->tosnap);
655		rv = EZFS_NOSPC;
656		goto out;
657	}
658	/* Add this fs to nvlist. */
659	(void) snprintf(guidstring, sizeof (guidstring),
660	    "0x%llx", (longlong_t)guid);
661	fnvlist_add_nvlist(sd->fss, guidstring, nvfs);
662
663	/* Iterate over children. */
664	if (sd->recursive)
665		rv = zfs_iter_filesystems_v2(zhp, 0, send_iterate_fs, sd);
666
667out:
668	/* Restore saved fields. */
669	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
670	sd->fromsnap_txg = fromsnap_txg_save;
671	sd->tosnap_txg = tosnap_txg_save;
672
673	fnvlist_free(nv);
674	fnvlist_free(nvfs);
675
676	zfs_close(zhp);
677	return (rv);
678}
679
680static int
681gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
682    const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t doall,
683    boolean_t replicate, boolean_t skipmissing, boolean_t verbose,
684    boolean_t backup, boolean_t holds, boolean_t props, nvlist_t **nvlp,
685    avl_tree_t **avlp)
686{
687	zfs_handle_t *zhp;
688	send_data_t sd = { 0 };
689	int error;
690
691	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
692	if (zhp == NULL)
693		return (EZFS_BADTYPE);
694
695	sd.fss = fnvlist_alloc();
696	sd.fsname = fsname;
697	sd.fromsnap = fromsnap;
698	sd.tosnap = tosnap;
699	sd.recursive = recursive;
700	sd.raw = raw;
701	sd.doall = doall;
702	sd.replicate = replicate;
703	sd.skipmissing = skipmissing;
704	sd.verbose = verbose;
705	sd.backup = backup;
706	sd.holds = holds;
707	sd.props = props;
708
709	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
710		fnvlist_free(sd.fss);
711		if (avlp != NULL)
712			*avlp = NULL;
713		*nvlp = NULL;
714		return (error);
715	}
716
717	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
718		fnvlist_free(sd.fss);
719		*nvlp = NULL;
720		return (EZFS_NOMEM);
721	}
722
723	*nvlp = sd.fss;
724	return (0);
725}
726
727/*
728 * Routines specific to "zfs send"
729 */
730typedef struct send_dump_data {
731	/* these are all just the short snapname (the part after the @) */
732	const char *fromsnap;
733	const char *tosnap;
734	char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
735	uint64_t prevsnap_obj;
736	boolean_t seenfrom, seento, replicate, doall, fromorigin;
737	boolean_t dryrun, parsable, progress, embed_data, std_out;
738	boolean_t large_block, compress, raw, holds;
739	boolean_t progressastitle;
740	int outfd;
741	boolean_t err;
742	nvlist_t *fss;
743	nvlist_t *snapholds;
744	avl_tree_t *fsavl;
745	snapfilter_cb_t *filter_cb;
746	void *filter_cb_arg;
747	nvlist_t *debugnv;
748	char holdtag[ZFS_MAX_DATASET_NAME_LEN];
749	int cleanup_fd;
750	int verbosity;
751	uint64_t size;
752} send_dump_data_t;
753
754static int
755zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
756    enum lzc_send_flags flags, uint64_t *spacep)
757{
758	assert(snapname != NULL);
759
760	int error = lzc_send_space(snapname, from, flags, spacep);
761	if (error == 0)
762		return (0);
763
764	char errbuf[ERRBUFLEN];
765	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
766	    "warning: cannot estimate space for '%s'"), snapname);
767
768	libzfs_handle_t *hdl = zhp->zfs_hdl;
769	switch (error) {
770	case EXDEV:
771		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
772		    "not an earlier snapshot from the same fs"));
773		return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
774
775	case ENOENT:
776		if (zfs_dataset_exists(hdl, snapname,
777		    ZFS_TYPE_SNAPSHOT)) {
778			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
779			    "incremental source (%s) does not exist"),
780			    snapname);
781		}
782		return (zfs_error(hdl, EZFS_NOENT, errbuf));
783
784	case EDQUOT:
785	case EFBIG:
786	case EIO:
787	case ENOLINK:
788	case ENOSPC:
789	case ENOSTR:
790	case ENXIO:
791	case EPIPE:
792	case ERANGE:
793	case EFAULT:
794	case EROFS:
795	case EINVAL:
796		zfs_error_aux(hdl, "%s", zfs_strerror(error));
797		return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
798
799	default:
800		return (zfs_standard_error(hdl, error, errbuf));
801	}
802}
803
804/*
805 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
806 * NULL) to the file descriptor specified by outfd.
807 */
808static int
809dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
810    boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
811    nvlist_t *debugnv)
812{
813	zfs_cmd_t zc = {"\0"};
814	libzfs_handle_t *hdl = zhp->zfs_hdl;
815	nvlist_t *thisdbg;
816
817	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
818	assert(fromsnap_obj == 0 || !fromorigin);
819
820	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
821	zc.zc_cookie = outfd;
822	zc.zc_obj = fromorigin;
823	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
824	zc.zc_fromobj = fromsnap_obj;
825	zc.zc_flags = flags;
826
827	if (debugnv != NULL) {
828		thisdbg = fnvlist_alloc();
829		if (fromsnap != NULL && fromsnap[0] != '\0')
830			fnvlist_add_string(thisdbg, "fromsnap", fromsnap);
831	}
832
833	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
834		char errbuf[ERRBUFLEN];
835		int error = errno;
836
837		(void) snprintf(errbuf, sizeof (errbuf), "%s '%s'",
838		    dgettext(TEXT_DOMAIN, "warning: cannot send"),
839		    zhp->zfs_name);
840
841		if (debugnv != NULL) {
842			fnvlist_add_uint64(thisdbg, "error", error);
843			fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
844			fnvlist_free(thisdbg);
845		}
846
847		switch (error) {
848		case EXDEV:
849			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
850			    "not an earlier snapshot from the same fs"));
851			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
852
853		case EACCES:
854			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
855			    "source key must be loaded"));
856			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
857
858		case ENOENT:
859			if (zfs_dataset_exists(hdl, zc.zc_name,
860			    ZFS_TYPE_SNAPSHOT)) {
861				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
862				    "incremental source (@%s) does not exist"),
863				    zc.zc_value);
864			}
865			return (zfs_error(hdl, EZFS_NOENT, errbuf));
866
867		case EDQUOT:
868		case EFBIG:
869		case EIO:
870		case ENOLINK:
871		case ENOSPC:
872		case ENOSTR:
873		case ENXIO:
874		case EPIPE:
875		case ERANGE:
876		case EFAULT:
877		case EROFS:
878		case EINVAL:
879			zfs_error_aux(hdl, "%s", zfs_strerror(errno));
880			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
881
882		default:
883			return (zfs_standard_error(hdl, errno, errbuf));
884		}
885	}
886
887	if (debugnv != NULL) {
888		fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
889		fnvlist_free(thisdbg);
890	}
891
892	return (0);
893}
894
895static void
896gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
897{
898	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
899
900	/*
901	 * zfs_send() only sets snapholds for sends that need them,
902	 * e.g. replication and doall.
903	 */
904	if (sdd->snapholds == NULL)
905		return;
906
907	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
908}
909
910int
911zfs_send_progress(zfs_handle_t *zhp, int fd, uint64_t *bytes_written,
912    uint64_t *blocks_visited)
913{
914	zfs_cmd_t zc = {"\0"};
915
916	if (bytes_written != NULL)
917		*bytes_written = 0;
918	if (blocks_visited != NULL)
919		*blocks_visited = 0;
920	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
921	zc.zc_cookie = fd;
922	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
923		return (errno);
924	if (bytes_written != NULL)
925		*bytes_written = zc.zc_cookie;
926	if (blocks_visited != NULL)
927		*blocks_visited = zc.zc_objset_type;
928	return (0);
929}
930
931static volatile boolean_t send_progress_thread_signal_duetotimer;
932static void
933send_progress_thread_act(int sig, siginfo_t *info, void *ucontext)
934{
935	(void) sig, (void) ucontext;
936	send_progress_thread_signal_duetotimer = info->si_code == SI_TIMER;
937}
938
939struct timer_desirability {
940	timer_t timer;
941	boolean_t desired;
942};
943static void
944timer_delete_cleanup(void *timer)
945{
946	struct timer_desirability *td = timer;
947	if (td->desired)
948		timer_delete(td->timer);
949}
950
951#ifdef SIGINFO
952#define	SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO sigaddset(&new, SIGINFO)
953#else
954#define	SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO
955#endif
956#define	SEND_PROGRESS_THREAD_PARENT_BLOCK(old) { \
957	sigset_t new; \
958	sigemptyset(&new); \
959	sigaddset(&new, SIGUSR1); \
960	SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO; \
961	pthread_sigmask(SIG_BLOCK, &new, old); \
962}
963
964static void *
965send_progress_thread(void *arg)
966{
967	progress_arg_t *pa = arg;
968	zfs_handle_t *zhp = pa->pa_zhp;
969	uint64_t bytes;
970	uint64_t blocks;
971	uint64_t total = pa->pa_size / 100;
972	char buf[16];
973	time_t t;
974	struct tm tm;
975	int err;
976
977	const struct sigaction signal_action =
978	    {.sa_sigaction = send_progress_thread_act, .sa_flags = SA_SIGINFO};
979	struct sigevent timer_cfg =
980	    {.sigev_notify = SIGEV_SIGNAL, .sigev_signo = SIGUSR1};
981	const struct itimerspec timer_time =
982	    {.it_value = {.tv_sec = 1}, .it_interval = {.tv_sec = 1}};
983	struct timer_desirability timer = {};
984
985	sigaction(SIGUSR1, &signal_action, NULL);
986#ifdef SIGINFO
987	sigaction(SIGINFO, &signal_action, NULL);
988#endif
989
990	if ((timer.desired = pa->pa_progress || pa->pa_astitle)) {
991		if (timer_create(CLOCK_MONOTONIC, &timer_cfg, &timer.timer))
992			return ((void *)(uintptr_t)errno);
993		(void) timer_settime(timer.timer, 0, &timer_time, NULL);
994	}
995	pthread_cleanup_push(timer_delete_cleanup, &timer);
996
997	if (!pa->pa_parsable && pa->pa_progress) {
998		(void) fprintf(stderr,
999		    "TIME       %s   %sSNAPSHOT %s\n",
1000		    pa->pa_estimate ? "BYTES" : " SENT",
1001		    pa->pa_verbosity >= 2 ? "   BLOCKS    " : "",
1002		    zhp->zfs_name);
1003	}
1004
1005	/*
1006	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1007	 */
1008	for (;;) {
1009		pause();
1010		if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes,
1011		    &blocks)) != 0) {
1012			if (err == EINTR || err == ENOENT)
1013				err = 0;
1014			pthread_exit(((void *)(uintptr_t)err));
1015		}
1016
1017		(void) time(&t);
1018		localtime_r(&t, &tm);
1019
1020		if (pa->pa_astitle) {
1021			char buf_bytes[16];
1022			char buf_size[16];
1023			int pct;
1024			zfs_nicenum(bytes, buf_bytes, sizeof (buf_bytes));
1025			zfs_nicenum(pa->pa_size, buf_size, sizeof (buf_size));
1026			pct = (total > 0) ? bytes / total : 100;
1027			zfs_setproctitle("sending %s (%d%%: %s/%s)",
1028			    zhp->zfs_name, MIN(pct, 100), buf_bytes, buf_size);
1029		}
1030
1031		if (pa->pa_verbosity >= 2 && pa->pa_parsable) {
1032			(void) fprintf(stderr,
1033			    "%02d:%02d:%02d\t%llu\t%llu\t%s\n",
1034			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1035			    (u_longlong_t)bytes, (u_longlong_t)blocks,
1036			    zhp->zfs_name);
1037		} else if (pa->pa_verbosity >= 2) {
1038			zfs_nicenum(bytes, buf, sizeof (buf));
1039			(void) fprintf(stderr,
1040			    "%02d:%02d:%02d   %5s    %8llu    %s\n",
1041			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1042			    buf, (u_longlong_t)blocks, zhp->zfs_name);
1043		} else if (pa->pa_parsable) {
1044			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1045			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1046			    (u_longlong_t)bytes, zhp->zfs_name);
1047		} else if (pa->pa_progress ||
1048		    !send_progress_thread_signal_duetotimer) {
1049			zfs_nicebytes(bytes, buf, sizeof (buf));
1050			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1051			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1052			    buf, zhp->zfs_name);
1053		}
1054	}
1055	pthread_cleanup_pop(B_TRUE);
1056	return (NULL);
1057}
1058
1059static boolean_t
1060send_progress_thread_exit(
1061    libzfs_handle_t *hdl, pthread_t ptid, sigset_t *oldmask)
1062{
1063	void *status = NULL;
1064	(void) pthread_cancel(ptid);
1065	(void) pthread_join(ptid, &status);
1066	pthread_sigmask(SIG_SETMASK, oldmask, NULL);
1067	int error = (int)(uintptr_t)status;
1068	if (error != 0 && status != PTHREAD_CANCELED)
1069		return (zfs_standard_error(hdl, error,
1070		    dgettext(TEXT_DOMAIN, "progress thread exited nonzero")));
1071	else
1072		return (B_FALSE);
1073}
1074
1075static void
1076send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1077    uint64_t size, boolean_t parsable)
1078{
1079	if (parsable) {
1080		if (fromsnap != NULL) {
1081			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1082			    "incremental\t%s\t%s"), fromsnap, tosnap);
1083		} else {
1084/*
1085 * Workaround for GCC 12+ with UBSan enabled deficencies.
1086 *
1087 * GCC 12+ invoked with -fsanitize=undefined incorrectly reports the code
1088 * below as violating -Wformat-overflow.
1089 */
1090#if defined(__GNUC__) && !defined(__clang__) && \
1091	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1092#pragma GCC diagnostic push
1093#pragma GCC diagnostic ignored "-Wformat-overflow"
1094#endif
1095			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1096			    "full\t%s"), tosnap);
1097#if defined(__GNUC__) && !defined(__clang__) && \
1098	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1099#pragma GCC diagnostic pop
1100#endif
1101		}
1102		(void) fprintf(fout, "\t%llu", (longlong_t)size);
1103	} else {
1104		if (fromsnap != NULL) {
1105			if (strchr(fromsnap, '@') == NULL &&
1106			    strchr(fromsnap, '#') == NULL) {
1107				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1108				    "send from @%s to %s"), fromsnap, tosnap);
1109			} else {
1110				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1111				    "send from %s to %s"), fromsnap, tosnap);
1112			}
1113		} else {
1114			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1115			    "full send of %s"), tosnap);
1116		}
1117		if (size != 0) {
1118			char buf[16];
1119			zfs_nicebytes(size, buf, sizeof (buf));
1120/*
1121 * Workaround for GCC 12+ with UBSan enabled deficencies.
1122 *
1123 * GCC 12+ invoked with -fsanitize=undefined incorrectly reports the code
1124 * below as violating -Wformat-overflow.
1125 */
1126#if defined(__GNUC__) && !defined(__clang__) && \
1127	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1128#pragma GCC diagnostic push
1129#pragma GCC diagnostic ignored "-Wformat-overflow"
1130#endif
1131			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1132			    " estimated size is %s"), buf);
1133#if defined(__GNUC__) && !defined(__clang__) && \
1134	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1135#pragma GCC diagnostic pop
1136#endif
1137		}
1138	}
1139	(void) fprintf(fout, "\n");
1140}
1141
1142/*
1143 * Send a single filesystem snapshot, updating the send dump data.
1144 * This interface is intended for use as a zfs_iter_snapshots_v2_sorted visitor.
1145 */
1146static int
1147dump_snapshot(zfs_handle_t *zhp, void *arg)
1148{
1149	send_dump_data_t *sdd = arg;
1150	progress_arg_t pa = { 0 };
1151	pthread_t tid;
1152	char *thissnap;
1153	enum lzc_send_flags flags = 0;
1154	int err;
1155	boolean_t isfromsnap, istosnap, fromorigin;
1156	boolean_t exclude = B_FALSE;
1157	FILE *fout = sdd->std_out ? stdout : stderr;
1158
1159	err = 0;
1160	thissnap = strchr(zhp->zfs_name, '@') + 1;
1161	isfromsnap = (sdd->fromsnap != NULL &&
1162	    strcmp(sdd->fromsnap, thissnap) == 0);
1163
1164	if (!sdd->seenfrom && isfromsnap) {
1165		gather_holds(zhp, sdd);
1166		sdd->seenfrom = B_TRUE;
1167		(void) strlcpy(sdd->prevsnap, thissnap, sizeof (sdd->prevsnap));
1168		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1169		zfs_close(zhp);
1170		return (0);
1171	}
1172
1173	if (sdd->seento || !sdd->seenfrom) {
1174		zfs_close(zhp);
1175		return (0);
1176	}
1177
1178	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1179	if (istosnap)
1180		sdd->seento = B_TRUE;
1181
1182	if (sdd->large_block)
1183		flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1184	if (sdd->embed_data)
1185		flags |= LZC_SEND_FLAG_EMBED_DATA;
1186	if (sdd->compress)
1187		flags |= LZC_SEND_FLAG_COMPRESS;
1188	if (sdd->raw)
1189		flags |= LZC_SEND_FLAG_RAW;
1190
1191	if (!sdd->doall && !isfromsnap && !istosnap) {
1192		if (sdd->replicate) {
1193			const char *snapname;
1194			nvlist_t *snapprops;
1195			/*
1196			 * Filter out all intermediate snapshots except origin
1197			 * snapshots needed to replicate clones.
1198			 */
1199			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1200			    zhp->zfs_dmustats.dds_guid, &snapname);
1201
1202			if (nvfs != NULL) {
1203				snapprops = fnvlist_lookup_nvlist(nvfs,
1204				    "snapprops");
1205				snapprops = fnvlist_lookup_nvlist(snapprops,
1206				    thissnap);
1207				exclude = !nvlist_exists(snapprops,
1208				    "is_clone_origin");
1209			}
1210		} else {
1211			exclude = B_TRUE;
1212		}
1213	}
1214
1215	/*
1216	 * If a filter function exists, call it to determine whether
1217	 * this snapshot will be sent.
1218	 */
1219	if (exclude || (sdd->filter_cb != NULL &&
1220	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1221		/*
1222		 * This snapshot is filtered out.  Don't send it, and don't
1223		 * set prevsnap_obj, so it will be as if this snapshot didn't
1224		 * exist, and the next accepted snapshot will be sent as
1225		 * an incremental from the last accepted one, or as the
1226		 * first (and full) snapshot in the case of a replication,
1227		 * non-incremental send.
1228		 */
1229		zfs_close(zhp);
1230		return (0);
1231	}
1232
1233	gather_holds(zhp, sdd);
1234	fromorigin = sdd->prevsnap[0] == '\0' &&
1235	    (sdd->fromorigin || sdd->replicate);
1236
1237	if (sdd->verbosity != 0) {
1238		uint64_t size = 0;
1239		char fromds[ZFS_MAX_DATASET_NAME_LEN];
1240
1241		if (sdd->prevsnap[0] != '\0') {
1242			(void) strlcpy(fromds, zhp->zfs_name, sizeof (fromds));
1243			*(strchr(fromds, '@') + 1) = '\0';
1244			(void) strlcat(fromds, sdd->prevsnap, sizeof (fromds));
1245		}
1246		if (zfs_send_space(zhp, zhp->zfs_name,
1247		    sdd->prevsnap[0] ? fromds : NULL, flags, &size) == 0) {
1248			send_print_verbose(fout, zhp->zfs_name,
1249			    sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1250			    size, sdd->parsable);
1251			sdd->size += size;
1252		}
1253	}
1254
1255	if (!sdd->dryrun) {
1256		/*
1257		 * If progress reporting is requested, spawn a new thread to
1258		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1259		 */
1260		sigset_t oldmask;
1261		{
1262			pa.pa_zhp = zhp;
1263			pa.pa_fd = sdd->outfd;
1264			pa.pa_parsable = sdd->parsable;
1265			pa.pa_estimate = B_FALSE;
1266			pa.pa_verbosity = sdd->verbosity;
1267			pa.pa_size = sdd->size;
1268			pa.pa_astitle = sdd->progressastitle;
1269			pa.pa_progress = sdd->progress;
1270
1271			if ((err = pthread_create(&tid, NULL,
1272			    send_progress_thread, &pa)) != 0) {
1273				zfs_close(zhp);
1274				return (err);
1275			}
1276			SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
1277		}
1278
1279		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1280		    fromorigin, sdd->outfd, flags, sdd->debugnv);
1281
1282		if (send_progress_thread_exit(zhp->zfs_hdl, tid, &oldmask))
1283			return (-1);
1284	}
1285
1286	(void) strlcpy(sdd->prevsnap, thissnap, sizeof (sdd->prevsnap));
1287	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1288	zfs_close(zhp);
1289	return (err);
1290}
1291
1292/*
1293 * Send all snapshots for a filesystem, updating the send dump data.
1294 */
1295static int
1296dump_filesystem(zfs_handle_t *zhp, send_dump_data_t *sdd)
1297{
1298	int rv = 0;
1299	boolean_t missingfrom = B_FALSE;
1300	zfs_cmd_t zc = {"\0"};
1301	uint64_t min_txg = 0, max_txg = 0;
1302
1303	/*
1304	 * Make sure the tosnap exists.
1305	 */
1306	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1307	    zhp->zfs_name, sdd->tosnap);
1308	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1309		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1310		    "WARNING: could not send %s@%s: does not exist\n"),
1311		    zhp->zfs_name, sdd->tosnap);
1312		sdd->err = B_TRUE;
1313		return (0);
1314	}
1315
1316	/*
1317	 * If this fs does not have fromsnap, and we're doing
1318	 * recursive, we need to send a full stream from the
1319	 * beginning (or an incremental from the origin if this
1320	 * is a clone).  If we're doing non-recursive, then let
1321	 * them get the error.
1322	 */
1323	if (sdd->replicate && sdd->fromsnap) {
1324		/*
1325		 * Make sure the fromsnap exists.
1326		 */
1327		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1328		    zhp->zfs_name, sdd->fromsnap);
1329		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0)
1330			missingfrom = B_TRUE;
1331	}
1332
1333	sdd->seenfrom = sdd->seento = B_FALSE;
1334	sdd->prevsnap[0] = '\0';
1335	sdd->prevsnap_obj = 0;
1336	if (sdd->fromsnap == NULL || missingfrom)
1337		sdd->seenfrom = B_TRUE;
1338
1339	/*
1340	 * Iterate through all snapshots and process the ones we will be
1341	 * sending. If we only have a "from" and "to" snapshot to deal
1342	 * with, we can avoid iterating through all the other snapshots.
1343	 */
1344	if (sdd->doall || sdd->replicate || sdd->tosnap == NULL) {
1345		if (!sdd->replicate) {
1346			if (sdd->fromsnap != NULL) {
1347				min_txg = get_snap_txg(zhp->zfs_hdl,
1348				    zhp->zfs_name, sdd->fromsnap);
1349			}
1350			if (sdd->tosnap != NULL) {
1351				max_txg = get_snap_txg(zhp->zfs_hdl,
1352				    zhp->zfs_name, sdd->tosnap);
1353			}
1354		}
1355		rv = zfs_iter_snapshots_sorted_v2(zhp, 0, dump_snapshot, sdd,
1356		    min_txg, max_txg);
1357	} else {
1358		char snapname[MAXPATHLEN] = { 0 };
1359		zfs_handle_t *snap;
1360
1361		/* Dump fromsnap. */
1362		if (!sdd->seenfrom) {
1363			(void) snprintf(snapname, sizeof (snapname),
1364			    "%s@%s", zhp->zfs_name, sdd->fromsnap);
1365			snap = zfs_open(zhp->zfs_hdl, snapname,
1366			    ZFS_TYPE_SNAPSHOT);
1367			if (snap != NULL)
1368				rv = dump_snapshot(snap, sdd);
1369			else
1370				rv = errno;
1371		}
1372
1373		/* Dump tosnap. */
1374		if (rv == 0) {
1375			(void) snprintf(snapname, sizeof (snapname),
1376			    "%s@%s", zhp->zfs_name, sdd->tosnap);
1377			snap = zfs_open(zhp->zfs_hdl, snapname,
1378			    ZFS_TYPE_SNAPSHOT);
1379			if (snap != NULL)
1380				rv = dump_snapshot(snap, sdd);
1381			else
1382				rv = errno;
1383		}
1384	}
1385
1386	if (!sdd->seenfrom) {
1387		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1388		    "WARNING: could not send %s@%s:\n"
1389		    "incremental source (%s@%s) does not exist\n"),
1390		    zhp->zfs_name, sdd->tosnap,
1391		    zhp->zfs_name, sdd->fromsnap);
1392		sdd->err = B_TRUE;
1393	} else if (!sdd->seento) {
1394		if (sdd->fromsnap) {
1395			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1396			    "WARNING: could not send %s@%s:\n"
1397			    "incremental source (%s@%s) "
1398			    "is not earlier than it\n"),
1399			    zhp->zfs_name, sdd->tosnap,
1400			    zhp->zfs_name, sdd->fromsnap);
1401		} else {
1402			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1403			    "WARNING: "
1404			    "could not send %s@%s: does not exist\n"),
1405			    zhp->zfs_name, sdd->tosnap);
1406		}
1407		sdd->err = B_TRUE;
1408	}
1409
1410	return (rv);
1411}
1412
1413/*
1414 * Send all snapshots for all filesystems in sdd.
1415 */
1416static int
1417dump_filesystems(zfs_handle_t *rzhp, send_dump_data_t *sdd)
1418{
1419	nvpair_t *fspair;
1420	boolean_t needagain, progress;
1421
1422	if (!sdd->replicate)
1423		return (dump_filesystem(rzhp, sdd));
1424
1425	/* Mark the clone origin snapshots. */
1426	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1427	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1428		nvlist_t *nvfs;
1429		uint64_t origin_guid = 0;
1430
1431		nvfs = fnvpair_value_nvlist(fspair);
1432		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1433		if (origin_guid != 0) {
1434			const char *snapname;
1435			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1436			    origin_guid, &snapname);
1437			if (origin_nv != NULL) {
1438				nvlist_t *snapprops;
1439				snapprops = fnvlist_lookup_nvlist(origin_nv,
1440				    "snapprops");
1441				snapprops = fnvlist_lookup_nvlist(snapprops,
1442				    snapname);
1443				fnvlist_add_boolean(snapprops,
1444				    "is_clone_origin");
1445			}
1446		}
1447	}
1448again:
1449	needagain = progress = B_FALSE;
1450	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1451	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1452		nvlist_t *fslist, *parent_nv;
1453		const char *fsname;
1454		zfs_handle_t *zhp;
1455		int err;
1456		uint64_t origin_guid = 0;
1457		uint64_t parent_guid = 0;
1458
1459		fslist = fnvpair_value_nvlist(fspair);
1460		if (nvlist_lookup_boolean(fslist, "sent") == 0)
1461			continue;
1462
1463		fsname = fnvlist_lookup_string(fslist, "name");
1464		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1465		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1466		    &parent_guid);
1467
1468		if (parent_guid != 0) {
1469			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1470			if (!nvlist_exists(parent_nv, "sent")) {
1471				/* Parent has not been sent; skip this one. */
1472				needagain = B_TRUE;
1473				continue;
1474			}
1475		}
1476
1477		if (origin_guid != 0) {
1478			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1479			    origin_guid, NULL);
1480			if (origin_nv != NULL &&
1481			    !nvlist_exists(origin_nv, "sent")) {
1482				/*
1483				 * Origin has not been sent yet;
1484				 * skip this clone.
1485				 */
1486				needagain = B_TRUE;
1487				continue;
1488			}
1489		}
1490
1491		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1492		if (zhp == NULL)
1493			return (-1);
1494		err = dump_filesystem(zhp, sdd);
1495		fnvlist_add_boolean(fslist, "sent");
1496		progress = B_TRUE;
1497		zfs_close(zhp);
1498		if (err)
1499			return (err);
1500	}
1501	if (needagain) {
1502		assert(progress);
1503		goto again;
1504	}
1505
1506	/* Clean out the sent flags in case we reuse this fss. */
1507	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1508	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1509		nvlist_t *fslist;
1510
1511		fslist = fnvpair_value_nvlist(fspair);
1512		(void) nvlist_remove_all(fslist, "sent");
1513	}
1514
1515	return (0);
1516}
1517
1518nvlist_t *
1519zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1520{
1521	unsigned int version;
1522	int nread, i;
1523	unsigned long long checksum, packed_len;
1524
1525	/*
1526	 * Decode token header, which is:
1527	 *   <token version>-<checksum of payload>-<uncompressed payload length>
1528	 * Note that the only supported token version is 1.
1529	 */
1530	nread = sscanf(token, "%u-%llx-%llx-",
1531	    &version, &checksum, &packed_len);
1532	if (nread != 3) {
1533		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1534		    "resume token is corrupt (invalid format)"));
1535		return (NULL);
1536	}
1537
1538	if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1539		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1540		    "resume token is corrupt (invalid version %u)"),
1541		    version);
1542		return (NULL);
1543	}
1544
1545	/* Convert hexadecimal representation to binary. */
1546	token = strrchr(token, '-') + 1;
1547	int len = strlen(token) / 2;
1548	unsigned char *compressed = zfs_alloc(hdl, len);
1549	for (i = 0; i < len; i++) {
1550		nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1551		if (nread != 1) {
1552			free(compressed);
1553			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1554			    "resume token is corrupt "
1555			    "(payload is not hex-encoded)"));
1556			return (NULL);
1557		}
1558	}
1559
1560	/* Verify checksum. */
1561	zio_cksum_t cksum;
1562	fletcher_4_native_varsize(compressed, len, &cksum);
1563	if (cksum.zc_word[0] != checksum) {
1564		free(compressed);
1565		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1566		    "resume token is corrupt (incorrect checksum)"));
1567		return (NULL);
1568	}
1569
1570	/* Uncompress. */
1571	void *packed = zfs_alloc(hdl, packed_len);
1572	uLongf packed_len_long = packed_len;
1573	if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1574	    packed_len_long != packed_len) {
1575		free(packed);
1576		free(compressed);
1577		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1578		    "resume token is corrupt (decompression failed)"));
1579		return (NULL);
1580	}
1581
1582	/* Unpack nvlist. */
1583	nvlist_t *nv;
1584	int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1585	free(packed);
1586	free(compressed);
1587	if (error != 0) {
1588		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1589		    "resume token is corrupt (nvlist_unpack failed)"));
1590		return (NULL);
1591	}
1592	return (nv);
1593}
1594
1595static enum lzc_send_flags
1596lzc_flags_from_sendflags(const sendflags_t *flags)
1597{
1598	enum lzc_send_flags lzc_flags = 0;
1599
1600	if (flags->largeblock)
1601		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1602	if (flags->embed_data)
1603		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1604	if (flags->compress)
1605		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1606	if (flags->raw)
1607		lzc_flags |= LZC_SEND_FLAG_RAW;
1608	if (flags->saved)
1609		lzc_flags |= LZC_SEND_FLAG_SAVED;
1610
1611	return (lzc_flags);
1612}
1613
1614static int
1615estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
1616    uint64_t resumeobj, uint64_t resumeoff, uint64_t bytes,
1617    const char *redactbook, char *errbuf, uint64_t *sizep)
1618{
1619	uint64_t size;
1620	FILE *fout = flags->dryrun ? stdout : stderr;
1621	progress_arg_t pa = { 0 };
1622	int err = 0;
1623	pthread_t ptid;
1624	sigset_t oldmask;
1625
1626	{
1627		pa.pa_zhp = zhp;
1628		pa.pa_fd = fd;
1629		pa.pa_parsable = flags->parsable;
1630		pa.pa_estimate = B_TRUE;
1631		pa.pa_verbosity = flags->verbosity;
1632
1633		err = pthread_create(&ptid, NULL,
1634		    send_progress_thread, &pa);
1635		if (err != 0) {
1636			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(errno));
1637			return (zfs_error(zhp->zfs_hdl,
1638			    EZFS_THREADCREATEFAILED, errbuf));
1639		}
1640		SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
1641	}
1642
1643	err = lzc_send_space_resume_redacted(zhp->zfs_name, from,
1644	    lzc_flags_from_sendflags(flags), resumeobj, resumeoff, bytes,
1645	    redactbook, fd, &size);
1646	*sizep = size;
1647
1648	if (send_progress_thread_exit(zhp->zfs_hdl, ptid, &oldmask))
1649		return (-1);
1650
1651	if (!flags->progress && !flags->parsable)
1652		return (err);
1653
1654	if (err != 0) {
1655		zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(err));
1656		return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
1657		    errbuf));
1658	}
1659	send_print_verbose(fout, zhp->zfs_name, from, size,
1660	    flags->parsable);
1661
1662	if (flags->parsable) {
1663		(void) fprintf(fout, "size\t%llu\n", (longlong_t)size);
1664	} else {
1665		char buf[16];
1666		zfs_nicenum(size, buf, sizeof (buf));
1667		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1668		    "total estimated size is %s\n"), buf);
1669	}
1670	return (0);
1671}
1672
1673static boolean_t
1674redact_snaps_contains(const uint64_t *snaps, uint64_t num_snaps, uint64_t guid)
1675{
1676	for (int i = 0; i < num_snaps; i++) {
1677		if (snaps[i] == guid)
1678			return (B_TRUE);
1679	}
1680	return (B_FALSE);
1681}
1682
1683static boolean_t
1684redact_snaps_equal(const uint64_t *snaps1, uint64_t num_snaps1,
1685    const uint64_t *snaps2, uint64_t num_snaps2)
1686{
1687	if (num_snaps1 != num_snaps2)
1688		return (B_FALSE);
1689	for (int i = 0; i < num_snaps1; i++) {
1690		if (!redact_snaps_contains(snaps2, num_snaps2, snaps1[i]))
1691			return (B_FALSE);
1692	}
1693	return (B_TRUE);
1694}
1695
1696static int
1697get_bookmarks(const char *path, nvlist_t **bmarksp)
1698{
1699	nvlist_t *props = fnvlist_alloc();
1700	int error;
1701
1702	fnvlist_add_boolean(props, "redact_complete");
1703	fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1704	error = lzc_get_bookmarks(path, props, bmarksp);
1705	fnvlist_free(props);
1706	return (error);
1707}
1708
1709static nvpair_t *
1710find_redact_pair(nvlist_t *bmarks, const uint64_t *redact_snap_guids,
1711    int num_redact_snaps)
1712{
1713	nvpair_t *pair;
1714
1715	for (pair = nvlist_next_nvpair(bmarks, NULL); pair;
1716	    pair = nvlist_next_nvpair(bmarks, pair)) {
1717
1718		nvlist_t *bmark = fnvpair_value_nvlist(pair);
1719		nvlist_t *vallist = fnvlist_lookup_nvlist(bmark,
1720		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1721		uint_t len = 0;
1722		uint64_t *bmarksnaps = fnvlist_lookup_uint64_array(vallist,
1723		    ZPROP_VALUE, &len);
1724		if (redact_snaps_equal(redact_snap_guids,
1725		    num_redact_snaps, bmarksnaps, len)) {
1726			break;
1727		}
1728	}
1729	return (pair);
1730}
1731
1732static boolean_t
1733get_redact_complete(nvpair_t *pair)
1734{
1735	nvlist_t *bmark = fnvpair_value_nvlist(pair);
1736	nvlist_t *vallist = fnvlist_lookup_nvlist(bmark, "redact_complete");
1737	boolean_t complete = fnvlist_lookup_boolean_value(vallist,
1738	    ZPROP_VALUE);
1739
1740	return (complete);
1741}
1742
1743/*
1744 * Check that the list of redaction snapshots in the bookmark matches the send
1745 * we're resuming, and return whether or not it's complete.
1746 *
1747 * Note that the caller needs to free the contents of *bookname with free() if
1748 * this function returns successfully.
1749 */
1750static int
1751find_redact_book(libzfs_handle_t *hdl, const char *path,
1752    const uint64_t *redact_snap_guids, int num_redact_snaps,
1753    char **bookname)
1754{
1755	char errbuf[ERRBUFLEN];
1756	nvlist_t *bmarks;
1757
1758	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1759	    "cannot resume send"));
1760
1761	int error = get_bookmarks(path, &bmarks);
1762	if (error != 0) {
1763		if (error == ESRCH) {
1764			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1765			    "nonexistent redaction bookmark provided"));
1766		} else if (error == ENOENT) {
1767			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1768			    "dataset to be sent no longer exists"));
1769		} else {
1770			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1771			    "unknown error: %s"), zfs_strerror(error));
1772		}
1773		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1774	}
1775	nvpair_t *pair = find_redact_pair(bmarks, redact_snap_guids,
1776	    num_redact_snaps);
1777	if (pair == NULL)  {
1778		fnvlist_free(bmarks);
1779		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1780		    "no appropriate redaction bookmark exists"));
1781		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1782	}
1783	boolean_t complete = get_redact_complete(pair);
1784	if (!complete) {
1785		fnvlist_free(bmarks);
1786		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1787		    "incomplete redaction bookmark provided"));
1788		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1789	}
1790	*bookname = strndup(nvpair_name(pair), ZFS_MAX_DATASET_NAME_LEN);
1791	ASSERT3P(*bookname, !=, NULL);
1792	fnvlist_free(bmarks);
1793	return (0);
1794}
1795
1796static enum lzc_send_flags
1797lzc_flags_from_resume_nvl(nvlist_t *resume_nvl)
1798{
1799	enum lzc_send_flags lzc_flags = 0;
1800
1801	if (nvlist_exists(resume_nvl, "largeblockok"))
1802		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1803	if (nvlist_exists(resume_nvl, "embedok"))
1804		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1805	if (nvlist_exists(resume_nvl, "compressok"))
1806		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1807	if (nvlist_exists(resume_nvl, "rawok"))
1808		lzc_flags |= LZC_SEND_FLAG_RAW;
1809	if (nvlist_exists(resume_nvl, "savedok"))
1810		lzc_flags |= LZC_SEND_FLAG_SAVED;
1811
1812	return (lzc_flags);
1813}
1814
1815static int
1816zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
1817    int outfd, nvlist_t *resume_nvl)
1818{
1819	char errbuf[ERRBUFLEN];
1820	const char *toname;
1821	const char *fromname = NULL;
1822	uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1823	zfs_handle_t *zhp;
1824	int error = 0;
1825	char name[ZFS_MAX_DATASET_NAME_LEN];
1826	FILE *fout = (flags->verbosity > 0 && flags->dryrun) ? stdout : stderr;
1827	uint64_t *redact_snap_guids = NULL;
1828	int num_redact_snaps = 0;
1829	char *redact_book = NULL;
1830	uint64_t size = 0;
1831
1832	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1833	    "cannot resume send"));
1834
1835	if (flags->verbosity != 0) {
1836		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1837		    "resume token contents:\n"));
1838		nvlist_print(fout, resume_nvl);
1839	}
1840
1841	if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1842	    nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1843	    nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1844	    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1845	    nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1846		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1847		    "resume token is corrupt"));
1848		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1849	}
1850	fromguid = 0;
1851	(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1852
1853	if (flags->saved) {
1854		(void) strlcpy(name, toname, sizeof (name));
1855	} else {
1856		error = guid_to_name(hdl, toname, toguid, B_FALSE, name);
1857		if (error != 0) {
1858			if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1859				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1860				    "'%s' is no longer the same snapshot "
1861				    "used in the initial send"), toname);
1862			} else {
1863				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1864				    "'%s' used in the initial send no "
1865				    "longer exists"), toname);
1866			}
1867			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1868		}
1869	}
1870
1871	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1872	if (zhp == NULL) {
1873		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1874		    "unable to access '%s'"), name);
1875		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1876	}
1877
1878	if (nvlist_lookup_uint64_array(resume_nvl, "book_redact_snaps",
1879	    &redact_snap_guids, (uint_t *)&num_redact_snaps) != 0) {
1880		num_redact_snaps = -1;
1881	}
1882
1883	if (fromguid != 0) {
1884		if (guid_to_name_redact_snaps(hdl, toname, fromguid, B_TRUE,
1885		    redact_snap_guids, num_redact_snaps, name) != 0) {
1886			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1887			    "incremental source %#llx no longer exists"),
1888			    (longlong_t)fromguid);
1889			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1890		}
1891		fromname = name;
1892	}
1893
1894	redact_snap_guids = NULL;
1895
1896	if (nvlist_lookup_uint64_array(resume_nvl,
1897	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &redact_snap_guids,
1898	    (uint_t *)&num_redact_snaps) == 0) {
1899		char path[ZFS_MAX_DATASET_NAME_LEN];
1900
1901		(void) strlcpy(path, toname, sizeof (path));
1902		char *at = strchr(path, '@');
1903		ASSERT3P(at, !=, NULL);
1904
1905		*at = '\0';
1906
1907		if ((error = find_redact_book(hdl, path, redact_snap_guids,
1908		    num_redact_snaps, &redact_book)) != 0) {
1909			return (error);
1910		}
1911	}
1912
1913	enum lzc_send_flags lzc_flags = lzc_flags_from_sendflags(flags) |
1914	    lzc_flags_from_resume_nvl(resume_nvl);
1915
1916	if (flags->verbosity != 0 || flags->progressastitle) {
1917		/*
1918		 * Some of these may have come from the resume token, set them
1919		 * here for size estimate purposes.
1920		 */
1921		sendflags_t tmpflags = *flags;
1922		if (lzc_flags & LZC_SEND_FLAG_LARGE_BLOCK)
1923			tmpflags.largeblock = B_TRUE;
1924		if (lzc_flags & LZC_SEND_FLAG_COMPRESS)
1925			tmpflags.compress = B_TRUE;
1926		if (lzc_flags & LZC_SEND_FLAG_EMBED_DATA)
1927			tmpflags.embed_data = B_TRUE;
1928		if (lzc_flags & LZC_SEND_FLAG_RAW)
1929			tmpflags.raw = B_TRUE;
1930		if (lzc_flags & LZC_SEND_FLAG_SAVED)
1931			tmpflags.saved = B_TRUE;
1932		error = estimate_size(zhp, fromname, outfd, &tmpflags,
1933		    resumeobj, resumeoff, bytes, redact_book, errbuf, &size);
1934	}
1935
1936	if (!flags->dryrun) {
1937		progress_arg_t pa = { 0 };
1938		pthread_t tid;
1939		sigset_t oldmask;
1940		/*
1941		 * If progress reporting is requested, spawn a new thread to
1942		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1943		 */
1944		{
1945			pa.pa_zhp = zhp;
1946			pa.pa_fd = outfd;
1947			pa.pa_parsable = flags->parsable;
1948			pa.pa_estimate = B_FALSE;
1949			pa.pa_verbosity = flags->verbosity;
1950			pa.pa_size = size;
1951			pa.pa_astitle = flags->progressastitle;
1952			pa.pa_progress = flags->progress;
1953
1954			error = pthread_create(&tid, NULL,
1955			    send_progress_thread, &pa);
1956			if (error != 0) {
1957				if (redact_book != NULL)
1958					free(redact_book);
1959				zfs_close(zhp);
1960				return (error);
1961			}
1962			SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
1963		}
1964
1965		error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd,
1966		    lzc_flags, resumeobj, resumeoff, redact_book);
1967		if (redact_book != NULL)
1968			free(redact_book);
1969
1970		if (send_progress_thread_exit(hdl, tid, &oldmask)) {
1971			zfs_close(zhp);
1972			return (-1);
1973		}
1974
1975		char errbuf[ERRBUFLEN];
1976		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1977		    "warning: cannot send '%s'"), zhp->zfs_name);
1978
1979		zfs_close(zhp);
1980
1981		switch (error) {
1982		case 0:
1983			return (0);
1984		case EACCES:
1985			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1986			    "source key must be loaded"));
1987			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1988		case ESRCH:
1989			if (lzc_exists(zhp->zfs_name)) {
1990				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1991				    "incremental source could not be found"));
1992			}
1993			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1994
1995		case EXDEV:
1996		case ENOENT:
1997		case EDQUOT:
1998		case EFBIG:
1999		case EIO:
2000		case ENOLINK:
2001		case ENOSPC:
2002		case ENOSTR:
2003		case ENXIO:
2004		case EPIPE:
2005		case ERANGE:
2006		case EFAULT:
2007		case EROFS:
2008			zfs_error_aux(hdl, "%s", zfs_strerror(errno));
2009			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2010
2011		default:
2012			return (zfs_standard_error(hdl, errno, errbuf));
2013		}
2014	} else {
2015		if (redact_book != NULL)
2016			free(redact_book);
2017	}
2018
2019	zfs_close(zhp);
2020
2021	return (error);
2022}
2023
2024struct zfs_send_resume_impl {
2025	libzfs_handle_t *hdl;
2026	sendflags_t *flags;
2027	nvlist_t *resume_nvl;
2028};
2029
2030static int
2031zfs_send_resume_impl_cb(int outfd, void *arg)
2032{
2033	struct zfs_send_resume_impl *zsri = arg;
2034	return (zfs_send_resume_impl_cb_impl(zsri->hdl, zsri->flags, outfd,
2035	    zsri->resume_nvl));
2036}
2037
2038static int
2039zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
2040    nvlist_t *resume_nvl)
2041{
2042	struct zfs_send_resume_impl zsri = {
2043		.hdl = hdl,
2044		.flags = flags,
2045		.resume_nvl = resume_nvl,
2046	};
2047	return (lzc_send_wrapper(zfs_send_resume_impl_cb, outfd, &zsri));
2048}
2049
2050int
2051zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
2052    const char *resume_token)
2053{
2054	int ret;
2055	char errbuf[ERRBUFLEN];
2056	nvlist_t *resume_nvl;
2057
2058	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2059	    "cannot resume send"));
2060
2061	resume_nvl = zfs_send_resume_token_to_nvlist(hdl, resume_token);
2062	if (resume_nvl == NULL) {
2063		/*
2064		 * zfs_error_aux has already been set by
2065		 * zfs_send_resume_token_to_nvlist()
2066		 */
2067		return (zfs_error(hdl, EZFS_FAULT, errbuf));
2068	}
2069
2070	ret = zfs_send_resume_impl(hdl, flags, outfd, resume_nvl);
2071	fnvlist_free(resume_nvl);
2072
2073	return (ret);
2074}
2075
2076int
2077zfs_send_saved(zfs_handle_t *zhp, sendflags_t *flags, int outfd,
2078    const char *resume_token)
2079{
2080	int ret;
2081	libzfs_handle_t *hdl = zhp->zfs_hdl;
2082	nvlist_t *saved_nvl = NULL, *resume_nvl = NULL;
2083	uint64_t saved_guid = 0, resume_guid = 0;
2084	uint64_t obj = 0, off = 0, bytes = 0;
2085	char token_buf[ZFS_MAXPROPLEN];
2086	char errbuf[ERRBUFLEN];
2087
2088	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2089	    "saved send failed"));
2090
2091	ret = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
2092	    token_buf, sizeof (token_buf), NULL, NULL, 0, B_TRUE);
2093	if (ret != 0)
2094		goto out;
2095
2096	saved_nvl = zfs_send_resume_token_to_nvlist(hdl, token_buf);
2097	if (saved_nvl == NULL) {
2098		/*
2099		 * zfs_error_aux has already been set by
2100		 * zfs_send_resume_token_to_nvlist()
2101		 */
2102		ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2103		goto out;
2104	}
2105
2106	/*
2107	 * If a resume token is provided we use the object and offset
2108	 * from that instead of the default, which starts from the
2109	 * beginning.
2110	 */
2111	if (resume_token != NULL) {
2112		resume_nvl = zfs_send_resume_token_to_nvlist(hdl,
2113		    resume_token);
2114		if (resume_nvl == NULL) {
2115			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2116			goto out;
2117		}
2118
2119		if (nvlist_lookup_uint64(resume_nvl, "object", &obj) != 0 ||
2120		    nvlist_lookup_uint64(resume_nvl, "offset", &off) != 0 ||
2121		    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
2122		    nvlist_lookup_uint64(resume_nvl, "toguid",
2123		    &resume_guid) != 0) {
2124			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2125			    "provided resume token is corrupt"));
2126			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2127			goto out;
2128		}
2129
2130		if (nvlist_lookup_uint64(saved_nvl, "toguid",
2131		    &saved_guid)) {
2132			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2133			    "dataset's resume token is corrupt"));
2134			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2135			goto out;
2136		}
2137
2138		if (resume_guid != saved_guid) {
2139			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2140			    "provided resume token does not match dataset"));
2141			ret = zfs_error(hdl, EZFS_BADBACKUP, errbuf);
2142			goto out;
2143		}
2144	}
2145
2146	(void) nvlist_remove_all(saved_nvl, "object");
2147	fnvlist_add_uint64(saved_nvl, "object", obj);
2148
2149	(void) nvlist_remove_all(saved_nvl, "offset");
2150	fnvlist_add_uint64(saved_nvl, "offset", off);
2151
2152	(void) nvlist_remove_all(saved_nvl, "bytes");
2153	fnvlist_add_uint64(saved_nvl, "bytes", bytes);
2154
2155	(void) nvlist_remove_all(saved_nvl, "toname");
2156	fnvlist_add_string(saved_nvl, "toname", zhp->zfs_name);
2157
2158	ret = zfs_send_resume_impl(hdl, flags, outfd, saved_nvl);
2159
2160out:
2161	fnvlist_free(saved_nvl);
2162	fnvlist_free(resume_nvl);
2163	return (ret);
2164}
2165
2166/*
2167 * This function informs the target system that the recursive send is complete.
2168 * The record is also expected in the case of a send -p.
2169 */
2170static int
2171send_conclusion_record(int fd, zio_cksum_t *zc)
2172{
2173	dmu_replay_record_t drr;
2174	memset(&drr, 0, sizeof (dmu_replay_record_t));
2175	drr.drr_type = DRR_END;
2176	if (zc != NULL)
2177		drr.drr_u.drr_end.drr_checksum = *zc;
2178	if (write(fd, &drr, sizeof (drr)) == -1) {
2179		return (errno);
2180	}
2181	return (0);
2182}
2183
2184/*
2185 * This function is responsible for sending the records that contain the
2186 * necessary information for the target system's libzfs to be able to set the
2187 * properties of the filesystem being received, or to be able to prepare for
2188 * a recursive receive.
2189 *
2190 * The "zhp" argument is the handle of the snapshot we are sending
2191 * (the "tosnap").  The "from" argument is the short snapshot name (the part
2192 * after the @) of the incremental source.
2193 */
2194static int
2195send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
2196    boolean_t gather_props, boolean_t recursive, boolean_t verbose,
2197    boolean_t dryrun, boolean_t raw, boolean_t replicate, boolean_t skipmissing,
2198    boolean_t backup, boolean_t holds, boolean_t props, boolean_t doall,
2199    nvlist_t **fssp, avl_tree_t **fsavlp)
2200{
2201	int err = 0;
2202	char *packbuf = NULL;
2203	size_t buflen = 0;
2204	zio_cksum_t zc = { {0} };
2205	int featureflags = 0;
2206	/* name of filesystem/volume that contains snapshot we are sending */
2207	char tofs[ZFS_MAX_DATASET_NAME_LEN];
2208	/* short name of snap we are sending */
2209	const char *tosnap = "";
2210
2211	char errbuf[ERRBUFLEN];
2212	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2213	    "warning: cannot send '%s'"), zhp->zfs_name);
2214	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp,
2215	    ZFS_PROP_VERSION) >= ZPL_VERSION_SA) {
2216		featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
2217	}
2218
2219	if (holds)
2220		featureflags |= DMU_BACKUP_FEATURE_HOLDS;
2221
2222	(void) strlcpy(tofs, zhp->zfs_name, ZFS_MAX_DATASET_NAME_LEN);
2223	char *at = strchr(tofs, '@');
2224	if (at != NULL) {
2225		*at = '\0';
2226		tosnap = at + 1;
2227	}
2228
2229	if (gather_props) {
2230		nvlist_t *hdrnv = fnvlist_alloc();
2231		nvlist_t *fss = NULL;
2232
2233		if (from != NULL)
2234			fnvlist_add_string(hdrnv, "fromsnap", from);
2235		fnvlist_add_string(hdrnv, "tosnap", tosnap);
2236		if (!recursive)
2237			fnvlist_add_boolean(hdrnv, "not_recursive");
2238
2239		if (raw) {
2240			fnvlist_add_boolean(hdrnv, "raw");
2241		}
2242
2243		if (gather_nvlist(zhp->zfs_hdl, tofs,
2244		    from, tosnap, recursive, raw, doall, replicate, skipmissing,
2245		    verbose, backup, holds, props, &fss, fsavlp) != 0) {
2246			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2247			    errbuf));
2248		}
2249		/*
2250		 * Do not allow the size of the properties list to exceed
2251		 * the limit
2252		 */
2253		if ((fnvlist_size(fss) + fnvlist_size(hdrnv)) >
2254		    zhp->zfs_hdl->libzfs_max_nvlist) {
2255			(void) snprintf(errbuf, sizeof (errbuf),
2256			    dgettext(TEXT_DOMAIN, "warning: cannot send '%s': "
2257			    "the size of the list of snapshots and properties "
2258			    "is too large to be received successfully.\n"
2259			    "Select a smaller number of snapshots to send.\n"),
2260			    zhp->zfs_name);
2261			return (zfs_error(zhp->zfs_hdl, EZFS_NOSPC,
2262			    errbuf));
2263		}
2264		fnvlist_add_nvlist(hdrnv, "fss", fss);
2265		VERIFY0(nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR,
2266		    0));
2267		if (fssp != NULL) {
2268			*fssp = fss;
2269		} else {
2270			fnvlist_free(fss);
2271		}
2272		fnvlist_free(hdrnv);
2273	}
2274
2275	if (!dryrun) {
2276		dmu_replay_record_t drr;
2277		memset(&drr, 0, sizeof (dmu_replay_record_t));
2278		/* write first begin record */
2279		drr.drr_type = DRR_BEGIN;
2280		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
2281		DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
2282		    drr_versioninfo, DMU_COMPOUNDSTREAM);
2283		DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
2284		    drr_versioninfo, featureflags);
2285		if (snprintf(drr.drr_u.drr_begin.drr_toname,
2286		    sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", tofs,
2287		    tosnap) >= sizeof (drr.drr_u.drr_begin.drr_toname)) {
2288			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2289			    errbuf));
2290		}
2291		drr.drr_payloadlen = buflen;
2292
2293		err = dump_record(&drr, packbuf, buflen, &zc, fd);
2294		free(packbuf);
2295		if (err != 0) {
2296			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(err));
2297			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2298			    errbuf));
2299		}
2300		err = send_conclusion_record(fd, &zc);
2301		if (err != 0) {
2302			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(err));
2303			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2304			    errbuf));
2305		}
2306	}
2307	return (0);
2308}
2309
2310/*
2311 * Generate a send stream.  The "zhp" argument is the filesystem/volume
2312 * that contains the snapshot to send.  The "fromsnap" argument is the
2313 * short name (the part after the '@') of the snapshot that is the
2314 * incremental source to send from (if non-NULL).  The "tosnap" argument
2315 * is the short name of the snapshot to send.
2316 *
2317 * The content of the send stream is the snapshot identified by
2318 * 'tosnap'.  Incremental streams are requested in two ways:
2319 *     - from the snapshot identified by "fromsnap" (if non-null) or
2320 *     - from the origin of the dataset identified by zhp, which must
2321 *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
2322 *	 is TRUE.
2323 *
2324 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
2325 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
2326 * if "replicate" is set.  If "doall" is set, dump all the intermediate
2327 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
2328 * case too. If "props" is set, send properties.
2329 *
2330 * Pre-wrapped (cf. lzc_send_wrapper()).
2331 */
2332static int
2333zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2334    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
2335    void *cb_arg, nvlist_t **debugnvp)
2336{
2337	char errbuf[ERRBUFLEN];
2338	send_dump_data_t sdd = { 0 };
2339	int err = 0;
2340	nvlist_t *fss = NULL;
2341	avl_tree_t *fsavl = NULL;
2342	static uint64_t holdseq;
2343	int spa_version;
2344	FILE *fout;
2345
2346	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2347	    "cannot send '%s'"), zhp->zfs_name);
2348
2349	if (fromsnap && fromsnap[0] == '\0') {
2350		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2351		    "zero-length incremental source"));
2352		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
2353	}
2354
2355	if (fromsnap) {
2356		char full_fromsnap_name[ZFS_MAX_DATASET_NAME_LEN];
2357		if (snprintf(full_fromsnap_name, sizeof (full_fromsnap_name),
2358		    "%s@%s", zhp->zfs_name, fromsnap) >=
2359		    sizeof (full_fromsnap_name)) {
2360			err = EINVAL;
2361			goto stderr_out;
2362		}
2363		zfs_handle_t *fromsnapn = zfs_open(zhp->zfs_hdl,
2364		    full_fromsnap_name, ZFS_TYPE_SNAPSHOT);
2365		if (fromsnapn == NULL) {
2366			err = -1;
2367			goto err_out;
2368		}
2369		zfs_close(fromsnapn);
2370	}
2371
2372	if (flags->replicate || flags->doall || flags->props ||
2373	    flags->holds || flags->backup) {
2374		char full_tosnap_name[ZFS_MAX_DATASET_NAME_LEN];
2375		if (snprintf(full_tosnap_name, sizeof (full_tosnap_name),
2376		    "%s@%s", zhp->zfs_name, tosnap) >=
2377		    sizeof (full_tosnap_name)) {
2378			err = EINVAL;
2379			goto stderr_out;
2380		}
2381		zfs_handle_t *tosnap = zfs_open(zhp->zfs_hdl,
2382		    full_tosnap_name, ZFS_TYPE_SNAPSHOT);
2383		if (tosnap == NULL) {
2384			err = -1;
2385			goto err_out;
2386		}
2387		err = send_prelim_records(tosnap, fromsnap, outfd,
2388		    flags->replicate || flags->props || flags->holds,
2389		    flags->replicate, flags->verbosity > 0, flags->dryrun,
2390		    flags->raw, flags->replicate, flags->skipmissing,
2391		    flags->backup, flags->holds, flags->props, flags->doall,
2392		    &fss, &fsavl);
2393		zfs_close(tosnap);
2394		if (err != 0)
2395			goto err_out;
2396	}
2397
2398	/* dump each stream */
2399	sdd.fromsnap = fromsnap;
2400	sdd.tosnap = tosnap;
2401	sdd.outfd = outfd;
2402	sdd.replicate = flags->replicate;
2403	sdd.doall = flags->doall;
2404	sdd.fromorigin = flags->fromorigin;
2405	sdd.fss = fss;
2406	sdd.fsavl = fsavl;
2407	sdd.verbosity = flags->verbosity;
2408	sdd.parsable = flags->parsable;
2409	sdd.progress = flags->progress;
2410	sdd.progressastitle = flags->progressastitle;
2411	sdd.dryrun = flags->dryrun;
2412	sdd.large_block = flags->largeblock;
2413	sdd.embed_data = flags->embed_data;
2414	sdd.compress = flags->compress;
2415	sdd.raw = flags->raw;
2416	sdd.holds = flags->holds;
2417	sdd.filter_cb = filter_func;
2418	sdd.filter_cb_arg = cb_arg;
2419	if (debugnvp)
2420		sdd.debugnv = *debugnvp;
2421	if (sdd.verbosity != 0 && sdd.dryrun)
2422		sdd.std_out = B_TRUE;
2423	fout = sdd.std_out ? stdout : stderr;
2424
2425	/*
2426	 * Some flags require that we place user holds on the datasets that are
2427	 * being sent so they don't get destroyed during the send. We can skip
2428	 * this step if the pool is imported read-only since the datasets cannot
2429	 * be destroyed.
2430	 */
2431	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
2432	    ZPOOL_PROP_READONLY, NULL) &&
2433	    zfs_spa_version(zhp, &spa_version) == 0 &&
2434	    spa_version >= SPA_VERSION_USERREFS &&
2435	    (flags->doall || flags->replicate)) {
2436		++holdseq;
2437		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
2438		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
2439		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
2440		if (sdd.cleanup_fd < 0) {
2441			err = errno;
2442			goto stderr_out;
2443		}
2444		sdd.snapholds = fnvlist_alloc();
2445	} else {
2446		sdd.cleanup_fd = -1;
2447		sdd.snapholds = NULL;
2448	}
2449
2450	if (flags->verbosity != 0 || sdd.snapholds != NULL) {
2451		/*
2452		 * Do a verbose no-op dry run to get all the verbose output
2453		 * or to gather snapshot hold's before generating any data,
2454		 * then do a non-verbose real run to generate the streams.
2455		 */
2456		sdd.dryrun = B_TRUE;
2457		err = dump_filesystems(zhp, &sdd);
2458
2459		if (err != 0)
2460			goto stderr_out;
2461
2462		if (flags->verbosity != 0) {
2463			if (flags->parsable) {
2464				(void) fprintf(fout, "size\t%llu\n",
2465				    (longlong_t)sdd.size);
2466			} else {
2467				char buf[16];
2468				zfs_nicebytes(sdd.size, buf, sizeof (buf));
2469				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
2470				    "total estimated size is %s\n"), buf);
2471			}
2472		}
2473
2474		/* Ensure no snaps found is treated as an error. */
2475		if (!sdd.seento) {
2476			err = ENOENT;
2477			goto err_out;
2478		}
2479
2480		/* Skip the second run if dryrun was requested. */
2481		if (flags->dryrun)
2482			goto err_out;
2483
2484		if (sdd.snapholds != NULL) {
2485			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
2486			if (err != 0)
2487				goto stderr_out;
2488
2489			fnvlist_free(sdd.snapholds);
2490			sdd.snapholds = NULL;
2491		}
2492
2493		sdd.dryrun = B_FALSE;
2494		sdd.verbosity = 0;
2495	}
2496
2497	err = dump_filesystems(zhp, &sdd);
2498	fsavl_destroy(fsavl);
2499	fnvlist_free(fss);
2500
2501	/* Ensure no snaps found is treated as an error. */
2502	if (err == 0 && !sdd.seento)
2503		err = ENOENT;
2504
2505	if (sdd.cleanup_fd != -1) {
2506		VERIFY(0 == close(sdd.cleanup_fd));
2507		sdd.cleanup_fd = -1;
2508	}
2509
2510	if (!flags->dryrun && (flags->replicate || flags->doall ||
2511	    flags->props || flags->backup || flags->holds)) {
2512		/*
2513		 * write final end record.  NB: want to do this even if
2514		 * there was some error, because it might not be totally
2515		 * failed.
2516		 */
2517		int err2 = send_conclusion_record(outfd, NULL);
2518		if (err2 != 0)
2519			return (zfs_standard_error(zhp->zfs_hdl, err2, errbuf));
2520	}
2521
2522	return (err || sdd.err);
2523
2524stderr_out:
2525	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
2526err_out:
2527	fsavl_destroy(fsavl);
2528	fnvlist_free(fss);
2529	fnvlist_free(sdd.snapholds);
2530
2531	if (sdd.cleanup_fd != -1)
2532		VERIFY(0 == close(sdd.cleanup_fd));
2533	return (err);
2534}
2535
2536struct zfs_send {
2537	zfs_handle_t *zhp;
2538	const char *fromsnap;
2539	const char *tosnap;
2540	sendflags_t *flags;
2541	snapfilter_cb_t *filter_func;
2542	void *cb_arg;
2543	nvlist_t **debugnvp;
2544};
2545
2546static int
2547zfs_send_cb(int outfd, void *arg)
2548{
2549	struct zfs_send *zs = arg;
2550	return (zfs_send_cb_impl(zs->zhp, zs->fromsnap, zs->tosnap, zs->flags,
2551	    outfd, zs->filter_func, zs->cb_arg, zs->debugnvp));
2552}
2553
2554int
2555zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2556    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
2557    void *cb_arg, nvlist_t **debugnvp)
2558{
2559	struct zfs_send arg = {
2560		.zhp = zhp,
2561		.fromsnap = fromsnap,
2562		.tosnap = tosnap,
2563		.flags = flags,
2564		.filter_func = filter_func,
2565		.cb_arg = cb_arg,
2566		.debugnvp = debugnvp,
2567	};
2568	return (lzc_send_wrapper(zfs_send_cb, outfd, &arg));
2569}
2570
2571
2572static zfs_handle_t *
2573name_to_dir_handle(libzfs_handle_t *hdl, const char *snapname)
2574{
2575	char dirname[ZFS_MAX_DATASET_NAME_LEN];
2576	(void) strlcpy(dirname, snapname, ZFS_MAX_DATASET_NAME_LEN);
2577	char *c = strchr(dirname, '@');
2578	if (c != NULL)
2579		*c = '\0';
2580	return (zfs_open(hdl, dirname, ZFS_TYPE_DATASET));
2581}
2582
2583/*
2584 * Returns B_TRUE if earlier is an earlier snapshot in later's timeline; either
2585 * an earlier snapshot in the same filesystem, or a snapshot before later's
2586 * origin, or it's origin's origin, etc.
2587 */
2588static boolean_t
2589snapshot_is_before(zfs_handle_t *earlier, zfs_handle_t *later)
2590{
2591	boolean_t ret;
2592	uint64_t later_txg =
2593	    (later->zfs_type == ZFS_TYPE_FILESYSTEM ||
2594	    later->zfs_type == ZFS_TYPE_VOLUME ?
2595	    UINT64_MAX : zfs_prop_get_int(later, ZFS_PROP_CREATETXG));
2596	uint64_t earlier_txg = zfs_prop_get_int(earlier, ZFS_PROP_CREATETXG);
2597
2598	if (earlier_txg >= later_txg)
2599		return (B_FALSE);
2600
2601	zfs_handle_t *earlier_dir = name_to_dir_handle(earlier->zfs_hdl,
2602	    earlier->zfs_name);
2603	zfs_handle_t *later_dir = name_to_dir_handle(later->zfs_hdl,
2604	    later->zfs_name);
2605
2606	if (strcmp(earlier_dir->zfs_name, later_dir->zfs_name) == 0) {
2607		zfs_close(earlier_dir);
2608		zfs_close(later_dir);
2609		return (B_TRUE);
2610	}
2611
2612	char clonename[ZFS_MAX_DATASET_NAME_LEN];
2613	if (zfs_prop_get(later_dir, ZFS_PROP_ORIGIN, clonename,
2614	    ZFS_MAX_DATASET_NAME_LEN, NULL, NULL, 0, B_TRUE) != 0) {
2615		zfs_close(earlier_dir);
2616		zfs_close(later_dir);
2617		return (B_FALSE);
2618	}
2619
2620	zfs_handle_t *origin = zfs_open(earlier->zfs_hdl, clonename,
2621	    ZFS_TYPE_DATASET);
2622	uint64_t origin_txg = zfs_prop_get_int(origin, ZFS_PROP_CREATETXG);
2623
2624	/*
2625	 * If "earlier" is exactly the origin, then
2626	 * snapshot_is_before(earlier, origin) will return false (because
2627	 * they're the same).
2628	 */
2629	if (origin_txg == earlier_txg &&
2630	    strcmp(origin->zfs_name, earlier->zfs_name) == 0) {
2631		zfs_close(earlier_dir);
2632		zfs_close(later_dir);
2633		zfs_close(origin);
2634		return (B_TRUE);
2635	}
2636	zfs_close(earlier_dir);
2637	zfs_close(later_dir);
2638
2639	ret = snapshot_is_before(earlier, origin);
2640	zfs_close(origin);
2641	return (ret);
2642}
2643
2644/*
2645 * The "zhp" argument is the handle of the dataset to send (typically a
2646 * snapshot).  The "from" argument is the full name of the snapshot or
2647 * bookmark that is the incremental source.
2648 *
2649 * Pre-wrapped (cf. lzc_send_wrapper()).
2650 */
2651static int
2652zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
2653    sendflags_t *flags, const char *redactbook)
2654{
2655	int err;
2656	libzfs_handle_t *hdl = zhp->zfs_hdl;
2657	char *name = zhp->zfs_name;
2658	pthread_t ptid;
2659	progress_arg_t pa = { 0 };
2660	uint64_t size = 0;
2661
2662	char errbuf[ERRBUFLEN];
2663	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2664	    "warning: cannot send '%s'"), name);
2665
2666	if (from != NULL && strchr(from, '@')) {
2667		zfs_handle_t *from_zhp = zfs_open(hdl, from,
2668		    ZFS_TYPE_DATASET);
2669		if (from_zhp == NULL)
2670			return (-1);
2671		if (!snapshot_is_before(from_zhp, zhp)) {
2672			zfs_close(from_zhp);
2673			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2674			    "not an earlier snapshot from the same fs"));
2675			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2676		}
2677		zfs_close(from_zhp);
2678	}
2679
2680	if (redactbook != NULL) {
2681		char bookname[ZFS_MAX_DATASET_NAME_LEN];
2682		nvlist_t *redact_snaps;
2683		zfs_handle_t *book_zhp;
2684		char *at, *pound;
2685		int dsnamelen;
2686
2687		pound = strchr(redactbook, '#');
2688		if (pound != NULL)
2689			redactbook = pound + 1;
2690		at = strchr(name, '@');
2691		if (at == NULL) {
2692			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2693			    "cannot do a redacted send to a filesystem"));
2694			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2695		}
2696		dsnamelen = at - name;
2697		if (snprintf(bookname, sizeof (bookname), "%.*s#%s",
2698		    dsnamelen, name, redactbook)
2699		    >= sizeof (bookname)) {
2700			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2701			    "invalid bookmark name"));
2702			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2703		}
2704		book_zhp = zfs_open(hdl, bookname, ZFS_TYPE_BOOKMARK);
2705		if (book_zhp == NULL)
2706			return (-1);
2707		if (nvlist_lookup_nvlist(book_zhp->zfs_props,
2708		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS),
2709		    &redact_snaps) != 0 || redact_snaps == NULL) {
2710			zfs_close(book_zhp);
2711			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2712			    "not a redaction bookmark"));
2713			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2714		}
2715		zfs_close(book_zhp);
2716	}
2717
2718	/*
2719	 * Send fs properties
2720	 */
2721	if (flags->props || flags->holds || flags->backup) {
2722		/*
2723		 * Note: the header generated by send_prelim_records()
2724		 * assumes that the incremental source is in the same
2725		 * filesystem/volume as the target (which is a requirement
2726		 * when doing "zfs send -R").  But that isn't always the
2727		 * case here (e.g. send from snap in origin, or send from
2728		 * bookmark).  We pass from=NULL, which will omit this
2729		 * information from the prelim records; it isn't used
2730		 * when receiving this type of stream.
2731		 */
2732		err = send_prelim_records(zhp, NULL, fd, B_TRUE, B_FALSE,
2733		    flags->verbosity > 0, flags->dryrun, flags->raw,
2734		    flags->replicate, B_FALSE, flags->backup, flags->holds,
2735		    flags->props, flags->doall, NULL, NULL);
2736		if (err != 0)
2737			return (err);
2738	}
2739
2740	/*
2741	 * Perform size estimate if verbose was specified.
2742	 */
2743	if (flags->verbosity != 0 || flags->progressastitle) {
2744		err = estimate_size(zhp, from, fd, flags, 0, 0, 0, redactbook,
2745		    errbuf, &size);
2746		if (err != 0)
2747			return (err);
2748	}
2749
2750	if (flags->dryrun)
2751		return (0);
2752
2753	/*
2754	 * If progress reporting is requested, spawn a new thread to poll
2755	 * ZFS_IOC_SEND_PROGRESS at a regular interval.
2756	 */
2757	sigset_t oldmask;
2758	{
2759		pa.pa_zhp = zhp;
2760		pa.pa_fd = fd;
2761		pa.pa_parsable = flags->parsable;
2762		pa.pa_estimate = B_FALSE;
2763		pa.pa_verbosity = flags->verbosity;
2764		pa.pa_size = size;
2765		pa.pa_astitle = flags->progressastitle;
2766		pa.pa_progress = flags->progress;
2767
2768		err = pthread_create(&ptid, NULL,
2769		    send_progress_thread, &pa);
2770		if (err != 0) {
2771			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(errno));
2772			return (zfs_error(zhp->zfs_hdl,
2773			    EZFS_THREADCREATEFAILED, errbuf));
2774		}
2775		SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
2776	}
2777
2778	err = lzc_send_redacted(name, from, fd,
2779	    lzc_flags_from_sendflags(flags), redactbook);
2780
2781	if (send_progress_thread_exit(hdl, ptid, &oldmask))
2782			return (-1);
2783
2784	if (err == 0 && (flags->props || flags->holds || flags->backup)) {
2785		/* Write the final end record. */
2786		err = send_conclusion_record(fd, NULL);
2787		if (err != 0)
2788			return (zfs_standard_error(hdl, err, errbuf));
2789	}
2790	if (err != 0) {
2791		switch (errno) {
2792		case EXDEV:
2793			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2794			    "not an earlier snapshot from the same fs"));
2795			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2796
2797		case ENOENT:
2798		case ESRCH:
2799			if (lzc_exists(name)) {
2800				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2801				    "incremental source (%s) does not exist"),
2802				    from);
2803			}
2804			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2805
2806		case EACCES:
2807			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2808			    "dataset key must be loaded"));
2809			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
2810
2811		case EBUSY:
2812			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2813			    "target is busy; if a filesystem, "
2814			    "it must not be mounted"));
2815			return (zfs_error(hdl, EZFS_BUSY, errbuf));
2816
2817		case EDQUOT:
2818		case EFAULT:
2819		case EFBIG:
2820		case EINVAL:
2821		case EIO:
2822		case ENOLINK:
2823		case ENOSPC:
2824		case ENOSTR:
2825		case ENXIO:
2826		case EPIPE:
2827		case ERANGE:
2828		case EROFS:
2829			zfs_error_aux(hdl, "%s", zfs_strerror(errno));
2830			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2831
2832		default:
2833			return (zfs_standard_error(hdl, errno, errbuf));
2834		}
2835	}
2836	return (err != 0);
2837}
2838
2839struct zfs_send_one {
2840	zfs_handle_t *zhp;
2841	const char *from;
2842	sendflags_t *flags;
2843	const char *redactbook;
2844};
2845
2846static int
2847zfs_send_one_cb(int fd, void *arg)
2848{
2849	struct zfs_send_one *zso = arg;
2850	return (zfs_send_one_cb_impl(zso->zhp, zso->from, fd, zso->flags,
2851	    zso->redactbook));
2852}
2853
2854int
2855zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
2856    const char *redactbook)
2857{
2858	struct zfs_send_one zso = {
2859		.zhp = zhp,
2860		.from = from,
2861		.flags = flags,
2862		.redactbook = redactbook,
2863	};
2864	return (lzc_send_wrapper(zfs_send_one_cb, fd, &zso));
2865}
2866
2867/*
2868 * Routines specific to "zfs recv"
2869 */
2870
2871static int
2872recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2873    boolean_t byteswap, zio_cksum_t *zc)
2874{
2875	char *cp = buf;
2876	int rv;
2877	int len = ilen;
2878
2879	do {
2880		rv = read(fd, cp, len);
2881		cp += rv;
2882		len -= rv;
2883	} while (rv > 0);
2884
2885	if (rv < 0 || len != 0) {
2886		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2887		    "failed to read from stream"));
2888		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2889		    "cannot receive")));
2890	}
2891
2892	if (zc) {
2893		if (byteswap)
2894			fletcher_4_incremental_byteswap(buf, ilen, zc);
2895		else
2896			fletcher_4_incremental_native(buf, ilen, zc);
2897	}
2898	return (0);
2899}
2900
2901static int
2902recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2903    boolean_t byteswap, zio_cksum_t *zc)
2904{
2905	char *buf;
2906	int err;
2907
2908	buf = zfs_alloc(hdl, len);
2909
2910	if (len > hdl->libzfs_max_nvlist) {
2911		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "nvlist too large"));
2912		free(buf);
2913		return (ENOMEM);
2914	}
2915
2916	err = recv_read(hdl, fd, buf, len, byteswap, zc);
2917	if (err != 0) {
2918		free(buf);
2919		return (err);
2920	}
2921
2922	err = nvlist_unpack(buf, len, nvp, 0);
2923	free(buf);
2924	if (err != 0) {
2925		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2926		    "stream (malformed nvlist)"));
2927		return (EINVAL);
2928	}
2929	return (0);
2930}
2931
2932/*
2933 * Returns the grand origin (origin of origin of origin...) of a given handle.
2934 * If this dataset is not a clone, it simply returns a copy of the original
2935 * handle.
2936 */
2937static zfs_handle_t *
2938recv_open_grand_origin(zfs_handle_t *zhp)
2939{
2940	char origin[ZFS_MAX_DATASET_NAME_LEN];
2941	zprop_source_t src;
2942	zfs_handle_t *ozhp = zfs_handle_dup(zhp);
2943
2944	while (ozhp != NULL) {
2945		if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin,
2946		    sizeof (origin), &src, NULL, 0, B_FALSE) != 0)
2947			break;
2948
2949		(void) zfs_close(ozhp);
2950		ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM);
2951	}
2952
2953	return (ozhp);
2954}
2955
2956static int
2957recv_rename_impl(zfs_handle_t *zhp, const char *name, const char *newname)
2958{
2959	int err;
2960	zfs_handle_t *ozhp = NULL;
2961
2962	/*
2963	 * Attempt to rename the dataset. If it fails with EACCES we have
2964	 * attempted to rename the dataset outside of its encryption root.
2965	 * Force the dataset to become an encryption root and try again.
2966	 */
2967	err = lzc_rename(name, newname);
2968	if (err == EACCES) {
2969		ozhp = recv_open_grand_origin(zhp);
2970		if (ozhp == NULL) {
2971			err = ENOENT;
2972			goto out;
2973		}
2974
2975		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2976		    NULL, NULL, 0);
2977		if (err != 0)
2978			goto out;
2979
2980		err = lzc_rename(name, newname);
2981	}
2982
2983out:
2984	if (ozhp != NULL)
2985		zfs_close(ozhp);
2986	return (err);
2987}
2988
2989static int
2990recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2991    int baselen, char *newname, recvflags_t *flags)
2992{
2993	static int seq;
2994	int err;
2995	prop_changelist_t *clp = NULL;
2996	zfs_handle_t *zhp = NULL;
2997
2998	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2999	if (zhp == NULL) {
3000		err = -1;
3001		goto out;
3002	}
3003	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
3004	    flags->force ? MS_FORCE : 0);
3005	if (clp == NULL) {
3006		err = -1;
3007		goto out;
3008	}
3009	err = changelist_prefix(clp);
3010	if (err)
3011		goto out;
3012
3013	if (tryname) {
3014		(void) strlcpy(newname, tryname, ZFS_MAX_DATASET_NAME_LEN);
3015		if (flags->verbose) {
3016			(void) printf("attempting rename %s to %s\n",
3017			    name, newname);
3018		}
3019		err = recv_rename_impl(zhp, name, newname);
3020		if (err == 0)
3021			changelist_rename(clp, name, tryname);
3022	} else {
3023		err = ENOENT;
3024	}
3025
3026	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
3027		seq++;
3028
3029		(void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
3030		    "%.*srecv-%u-%u", baselen, name, getpid(), seq);
3031
3032		if (flags->verbose) {
3033			(void) printf("failed - trying rename %s to %s\n",
3034			    name, newname);
3035		}
3036		err = recv_rename_impl(zhp, name, newname);
3037		if (err == 0)
3038			changelist_rename(clp, name, newname);
3039		if (err && flags->verbose) {
3040			(void) printf("failed (%u) - "
3041			    "will try again on next pass\n", errno);
3042		}
3043		err = EAGAIN;
3044	} else if (flags->verbose) {
3045		if (err == 0)
3046			(void) printf("success\n");
3047		else
3048			(void) printf("failed (%u)\n", errno);
3049	}
3050
3051	(void) changelist_postfix(clp);
3052
3053out:
3054	if (clp != NULL)
3055		changelist_free(clp);
3056	if (zhp != NULL)
3057		zfs_close(zhp);
3058
3059	return (err);
3060}
3061
3062static int
3063recv_promote(libzfs_handle_t *hdl, const char *fsname,
3064    const char *origin_fsname, recvflags_t *flags)
3065{
3066	int err;
3067	zfs_cmd_t zc = {"\0"};
3068	zfs_handle_t *zhp = NULL, *ozhp = NULL;
3069
3070	if (flags->verbose)
3071		(void) printf("promoting %s\n", fsname);
3072
3073	(void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value));
3074	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
3075
3076	/*
3077	 * Attempt to promote the dataset. If it fails with EACCES the
3078	 * promotion would cause this dataset to leave its encryption root.
3079	 * Force the origin to become an encryption root and try again.
3080	 */
3081	err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
3082	if (err == EACCES) {
3083		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
3084		if (zhp == NULL) {
3085			err = -1;
3086			goto out;
3087		}
3088
3089		ozhp = recv_open_grand_origin(zhp);
3090		if (ozhp == NULL) {
3091			err = -1;
3092			goto out;
3093		}
3094
3095		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
3096		    NULL, NULL, 0);
3097		if (err != 0)
3098			goto out;
3099
3100		err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
3101	}
3102
3103out:
3104	if (zhp != NULL)
3105		zfs_close(zhp);
3106	if (ozhp != NULL)
3107		zfs_close(ozhp);
3108
3109	return (err);
3110}
3111
3112static int
3113recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
3114    char *newname, recvflags_t *flags)
3115{
3116	int err = 0;
3117	prop_changelist_t *clp;
3118	zfs_handle_t *zhp;
3119	boolean_t defer = B_FALSE;
3120	int spa_version;
3121
3122	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
3123	if (zhp == NULL)
3124		return (-1);
3125	zfs_type_t type = zfs_get_type(zhp);
3126	if (type == ZFS_TYPE_SNAPSHOT &&
3127	    zfs_spa_version(zhp, &spa_version) == 0 &&
3128	    spa_version >= SPA_VERSION_USERREFS)
3129		defer = B_TRUE;
3130	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
3131	    flags->force ? MS_FORCE : 0);
3132	zfs_close(zhp);
3133	if (clp == NULL)
3134		return (-1);
3135
3136	err = changelist_prefix(clp);
3137	if (err)
3138		return (err);
3139
3140	if (flags->verbose)
3141		(void) printf("attempting destroy %s\n", name);
3142	if (type == ZFS_TYPE_SNAPSHOT) {
3143		nvlist_t *nv = fnvlist_alloc();
3144		fnvlist_add_boolean(nv, name);
3145		err = lzc_destroy_snaps(nv, defer, NULL);
3146		fnvlist_free(nv);
3147	} else {
3148		err = lzc_destroy(name);
3149	}
3150	if (err == 0) {
3151		if (flags->verbose)
3152			(void) printf("success\n");
3153		changelist_remove(clp, name);
3154	}
3155
3156	(void) changelist_postfix(clp);
3157	changelist_free(clp);
3158
3159	/*
3160	 * Deferred destroy might destroy the snapshot or only mark it to be
3161	 * destroyed later, and it returns success in either case.
3162	 */
3163	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
3164	    ZFS_TYPE_SNAPSHOT))) {
3165		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
3166	}
3167
3168	return (err);
3169}
3170
3171typedef struct guid_to_name_data {
3172	uint64_t guid;
3173	boolean_t bookmark_ok;
3174	char *name;
3175	char *skip;
3176	uint64_t *redact_snap_guids;
3177	uint64_t num_redact_snaps;
3178} guid_to_name_data_t;
3179
3180static boolean_t
3181redact_snaps_match(zfs_handle_t *zhp, guid_to_name_data_t *gtnd)
3182{
3183	uint64_t *bmark_snaps;
3184	uint_t bmark_num_snaps;
3185	nvlist_t *nvl;
3186	if (zhp->zfs_type != ZFS_TYPE_BOOKMARK)
3187		return (B_FALSE);
3188
3189	nvl = fnvlist_lookup_nvlist(zhp->zfs_props,
3190	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
3191	bmark_snaps = fnvlist_lookup_uint64_array(nvl, ZPROP_VALUE,
3192	    &bmark_num_snaps);
3193	if (bmark_num_snaps != gtnd->num_redact_snaps)
3194		return (B_FALSE);
3195	int i = 0;
3196	for (; i < bmark_num_snaps; i++) {
3197		int j = 0;
3198		for (; j < bmark_num_snaps; j++) {
3199			if (bmark_snaps[i] == gtnd->redact_snap_guids[j])
3200				break;
3201		}
3202		if (j == bmark_num_snaps)
3203			break;
3204	}
3205	return (i == bmark_num_snaps);
3206}
3207
3208static int
3209guid_to_name_cb(zfs_handle_t *zhp, void *arg)
3210{
3211	guid_to_name_data_t *gtnd = arg;
3212	const char *slash;
3213	int err;
3214
3215	if (gtnd->skip != NULL &&
3216	    (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
3217	    strcmp(slash + 1, gtnd->skip) == 0) {
3218		zfs_close(zhp);
3219		return (0);
3220	}
3221
3222	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid &&
3223	    (gtnd->num_redact_snaps == -1 || redact_snaps_match(zhp, gtnd))) {
3224		(void) strcpy(gtnd->name, zhp->zfs_name);
3225		zfs_close(zhp);
3226		return (EEXIST);
3227	}
3228
3229	err = zfs_iter_children_v2(zhp, 0, guid_to_name_cb, gtnd);
3230	if (err != EEXIST && gtnd->bookmark_ok)
3231		err = zfs_iter_bookmarks_v2(zhp, 0, guid_to_name_cb, gtnd);
3232	zfs_close(zhp);
3233	return (err);
3234}
3235
3236/*
3237 * Attempt to find the local dataset associated with this guid.  In the case of
3238 * multiple matches, we attempt to find the "best" match by searching
3239 * progressively larger portions of the hierarchy.  This allows one to send a
3240 * tree of datasets individually and guarantee that we will find the source
3241 * guid within that hierarchy, even if there are multiple matches elsewhere.
3242 *
3243 * If num_redact_snaps is not -1, we attempt to find a redaction bookmark with
3244 * the specified number of redaction snapshots.  If num_redact_snaps isn't 0 or
3245 * -1, then redact_snap_guids will be an array of the guids of the snapshots the
3246 * redaction bookmark was created with.  If num_redact_snaps is -1, then we will
3247 * attempt to find a snapshot or bookmark (if bookmark_ok is passed) with the
3248 * given guid.  Note that a redaction bookmark can be returned if
3249 * num_redact_snaps == -1.
3250 */
3251static int
3252guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
3253    uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
3254    uint64_t num_redact_snaps, char *name)
3255{
3256	char pname[ZFS_MAX_DATASET_NAME_LEN];
3257	guid_to_name_data_t gtnd;
3258
3259	gtnd.guid = guid;
3260	gtnd.bookmark_ok = bookmark_ok;
3261	gtnd.name = name;
3262	gtnd.skip = NULL;
3263	gtnd.redact_snap_guids = redact_snap_guids;
3264	gtnd.num_redact_snaps = num_redact_snaps;
3265
3266	/*
3267	 * Search progressively larger portions of the hierarchy, starting
3268	 * with the filesystem specified by 'parent'.  This will
3269	 * select the "most local" version of the origin snapshot in the case
3270	 * that there are multiple matching snapshots in the system.
3271	 */
3272	(void) strlcpy(pname, parent, sizeof (pname));
3273	char *cp = strrchr(pname, '@');
3274	if (cp == NULL)
3275		cp = strchr(pname, '\0');
3276	for (; cp != NULL; cp = strrchr(pname, '/')) {
3277		/* Chop off the last component and open the parent */
3278		*cp = '\0';
3279		zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
3280
3281		if (zhp == NULL)
3282			continue;
3283		int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
3284		if (err != EEXIST)
3285			err = zfs_iter_children_v2(zhp, 0, guid_to_name_cb,
3286			    &gtnd);
3287		if (err != EEXIST && bookmark_ok)
3288			err = zfs_iter_bookmarks_v2(zhp, 0, guid_to_name_cb,
3289			    &gtnd);
3290		zfs_close(zhp);
3291		if (err == EEXIST)
3292			return (0);
3293
3294		/*
3295		 * Remember the last portion of the dataset so we skip it next
3296		 * time through (as we've already searched that portion of the
3297		 * hierarchy).
3298		 */
3299		gtnd.skip = strrchr(pname, '/') + 1;
3300	}
3301
3302	return (ENOENT);
3303}
3304
3305static int
3306guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
3307    boolean_t bookmark_ok, char *name)
3308{
3309	return (guid_to_name_redact_snaps(hdl, parent, guid, bookmark_ok, NULL,
3310	    -1, name));
3311}
3312
3313/*
3314 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
3315 * guid1 is after guid2.
3316 */
3317static int
3318created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
3319    uint64_t guid1, uint64_t guid2)
3320{
3321	nvlist_t *nvfs;
3322	const char *fsname = NULL, *snapname = NULL;
3323	char buf[ZFS_MAX_DATASET_NAME_LEN];
3324	int rv;
3325	zfs_handle_t *guid1hdl, *guid2hdl;
3326	uint64_t create1, create2;
3327
3328	if (guid2 == 0)
3329		return (0);
3330	if (guid1 == 0)
3331		return (1);
3332
3333	nvfs = fsavl_find(avl, guid1, &snapname);
3334	fsname = fnvlist_lookup_string(nvfs, "name");
3335	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3336	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3337	if (guid1hdl == NULL)
3338		return (-1);
3339
3340	nvfs = fsavl_find(avl, guid2, &snapname);
3341	fsname = fnvlist_lookup_string(nvfs, "name");
3342	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3343	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3344	if (guid2hdl == NULL) {
3345		zfs_close(guid1hdl);
3346		return (-1);
3347	}
3348
3349	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
3350	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
3351
3352	if (create1 < create2)
3353		rv = -1;
3354	else if (create1 > create2)
3355		rv = +1;
3356	else
3357		rv = 0;
3358
3359	zfs_close(guid1hdl);
3360	zfs_close(guid2hdl);
3361
3362	return (rv);
3363}
3364
3365/*
3366 * This function reestablishes the hierarchy of encryption roots after a
3367 * recursive incremental receive has completed. This must be done after the
3368 * second call to recv_incremental_replication() has renamed and promoted all
3369 * sent datasets to their final locations in the dataset hierarchy.
3370 */
3371static int
3372recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *top_zfs,
3373    nvlist_t *stream_nv)
3374{
3375	int err;
3376	nvpair_t *fselem = NULL;
3377	nvlist_t *stream_fss;
3378
3379	stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
3380
3381	while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
3382		zfs_handle_t *zhp = NULL;
3383		uint64_t crypt;
3384		nvlist_t *snaps, *props, *stream_nvfs = NULL;
3385		nvpair_t *snapel = NULL;
3386		boolean_t is_encroot, is_clone, stream_encroot;
3387		char *cp;
3388		const char *stream_keylocation = NULL;
3389		char keylocation[MAXNAMELEN];
3390		char fsname[ZFS_MAX_DATASET_NAME_LEN];
3391
3392		keylocation[0] = '\0';
3393		stream_nvfs = fnvpair_value_nvlist(fselem);
3394		snaps = fnvlist_lookup_nvlist(stream_nvfs, "snaps");
3395		props = fnvlist_lookup_nvlist(stream_nvfs, "props");
3396		stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
3397
3398		/* find a snapshot from the stream that exists locally */
3399		err = ENOENT;
3400		while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
3401			uint64_t guid;
3402
3403			guid = fnvpair_value_uint64(snapel);
3404			err = guid_to_name(hdl, top_zfs, guid, B_FALSE,
3405			    fsname);
3406			if (err == 0)
3407				break;
3408		}
3409
3410		if (err != 0)
3411			continue;
3412
3413		cp = strchr(fsname, '@');
3414		if (cp != NULL)
3415			*cp = '\0';
3416
3417		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
3418		if (zhp == NULL) {
3419			err = ENOENT;
3420			goto error;
3421		}
3422
3423		crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
3424		is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0';
3425		(void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
3426
3427		/* we don't need to do anything for unencrypted datasets */
3428		if (crypt == ZIO_CRYPT_OFF) {
3429			zfs_close(zhp);
3430			continue;
3431		}
3432
3433		/*
3434		 * If the dataset is flagged as an encryption root, was not
3435		 * received as a clone and is not currently an encryption root,
3436		 * force it to become one. Fixup the keylocation if necessary.
3437		 */
3438		if (stream_encroot) {
3439			if (!is_clone && !is_encroot) {
3440				err = lzc_change_key(fsname,
3441				    DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0);
3442				if (err != 0) {
3443					zfs_close(zhp);
3444					goto error;
3445				}
3446			}
3447
3448			stream_keylocation = fnvlist_lookup_string(props,
3449			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
3450
3451			/*
3452			 * Refresh the properties in case the call to
3453			 * lzc_change_key() changed the value.
3454			 */
3455			zfs_refresh_properties(zhp);
3456			err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
3457			    keylocation, sizeof (keylocation), NULL, NULL,
3458			    0, B_TRUE);
3459			if (err != 0) {
3460				zfs_close(zhp);
3461				goto error;
3462			}
3463
3464			if (strcmp(keylocation, stream_keylocation) != 0) {
3465				err = zfs_prop_set(zhp,
3466				    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
3467				    stream_keylocation);
3468				if (err != 0) {
3469					zfs_close(zhp);
3470					goto error;
3471				}
3472			}
3473		}
3474
3475		/*
3476		 * If the dataset is not flagged as an encryption root and is
3477		 * currently an encryption root, force it to inherit from its
3478		 * parent. The root of a raw send should never be
3479		 * force-inherited.
3480		 */
3481		if (!stream_encroot && is_encroot &&
3482		    strcmp(top_zfs, fsname) != 0) {
3483			err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT,
3484			    NULL, NULL, 0);
3485			if (err != 0) {
3486				zfs_close(zhp);
3487				goto error;
3488			}
3489		}
3490
3491		zfs_close(zhp);
3492	}
3493
3494	return (0);
3495
3496error:
3497	return (err);
3498}
3499
3500static int
3501recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
3502    recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3503    nvlist_t *renamed)
3504{
3505	nvlist_t *local_nv, *deleted = NULL;
3506	avl_tree_t *local_avl;
3507	nvpair_t *fselem, *nextfselem;
3508	const char *fromsnap;
3509	char newname[ZFS_MAX_DATASET_NAME_LEN];
3510	char guidname[32];
3511	int error;
3512	boolean_t needagain, progress, recursive;
3513	const char *s1, *s2;
3514
3515	fromsnap = fnvlist_lookup_string(stream_nv, "fromsnap");
3516
3517	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3518	    ENOENT);
3519
3520	if (flags->dryrun)
3521		return (0);
3522
3523again:
3524	needagain = progress = B_FALSE;
3525
3526	deleted = fnvlist_alloc();
3527
3528	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
3529	    recursive, B_TRUE, B_FALSE, recursive, B_FALSE, B_FALSE, B_FALSE,
3530	    B_FALSE, B_TRUE, &local_nv, &local_avl)) != 0)
3531		return (error);
3532
3533	/*
3534	 * Process deletes and renames
3535	 */
3536	for (fselem = nvlist_next_nvpair(local_nv, NULL);
3537	    fselem; fselem = nextfselem) {
3538		nvlist_t *nvfs, *snaps;
3539		nvlist_t *stream_nvfs = NULL;
3540		nvpair_t *snapelem, *nextsnapelem;
3541		uint64_t fromguid = 0;
3542		uint64_t originguid = 0;
3543		uint64_t stream_originguid = 0;
3544		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
3545		const char *fsname, *stream_fsname;
3546
3547		nextfselem = nvlist_next_nvpair(local_nv, fselem);
3548
3549		nvfs = fnvpair_value_nvlist(fselem);
3550		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
3551		fsname = fnvlist_lookup_string(nvfs, "name");
3552		parent_fromsnap_guid = fnvlist_lookup_uint64(nvfs,
3553		    "parentfromsnap");
3554		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
3555
3556		/*
3557		 * First find the stream's fs, so we can check for
3558		 * a different origin (due to "zfs promote")
3559		 */
3560		for (snapelem = nvlist_next_nvpair(snaps, NULL);
3561		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
3562			uint64_t thisguid;
3563
3564			thisguid = fnvpair_value_uint64(snapelem);
3565			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
3566
3567			if (stream_nvfs != NULL)
3568				break;
3569		}
3570
3571		/* check for promote */
3572		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
3573		    &stream_originguid);
3574		if (stream_nvfs && originguid != stream_originguid) {
3575			switch (created_before(hdl, local_avl,
3576			    stream_originguid, originguid)) {
3577			case 1: {
3578				/* promote it! */
3579				nvlist_t *origin_nvfs;
3580				const char *origin_fsname;
3581
3582				origin_nvfs = fsavl_find(local_avl, originguid,
3583				    NULL);
3584				origin_fsname = fnvlist_lookup_string(
3585				    origin_nvfs, "name");
3586				error = recv_promote(hdl, fsname, origin_fsname,
3587				    flags);
3588				if (error == 0)
3589					progress = B_TRUE;
3590				break;
3591			}
3592			default:
3593				break;
3594			case -1:
3595				fsavl_destroy(local_avl);
3596				fnvlist_free(local_nv);
3597				return (-1);
3598			}
3599			/*
3600			 * We had/have the wrong origin, therefore our
3601			 * list of snapshots is wrong.  Need to handle
3602			 * them on the next pass.
3603			 */
3604			needagain = B_TRUE;
3605			continue;
3606		}
3607
3608		for (snapelem = nvlist_next_nvpair(snaps, NULL);
3609		    snapelem; snapelem = nextsnapelem) {
3610			uint64_t thisguid;
3611			const char *stream_snapname;
3612			nvlist_t *found, *props;
3613
3614			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
3615
3616			thisguid = fnvpair_value_uint64(snapelem);
3617			found = fsavl_find(stream_avl, thisguid,
3618			    &stream_snapname);
3619
3620			/* check for delete */
3621			if (found == NULL) {
3622				char name[ZFS_MAX_DATASET_NAME_LEN];
3623
3624				if (!flags->force)
3625					continue;
3626
3627				(void) snprintf(name, sizeof (name), "%s@%s",
3628				    fsname, nvpair_name(snapelem));
3629
3630				error = recv_destroy(hdl, name,
3631				    strlen(fsname)+1, newname, flags);
3632				if (error)
3633					needagain = B_TRUE;
3634				else
3635					progress = B_TRUE;
3636				sprintf(guidname, "%llu",
3637				    (u_longlong_t)thisguid);
3638				nvlist_add_boolean(deleted, guidname);
3639				continue;
3640			}
3641
3642			stream_nvfs = found;
3643
3644			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
3645			    &props) && 0 == nvlist_lookup_nvlist(props,
3646			    stream_snapname, &props)) {
3647				zfs_cmd_t zc = {"\0"};
3648
3649				zc.zc_cookie = B_TRUE; /* received */
3650				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
3651				    "%s@%s", fsname, nvpair_name(snapelem));
3652				zcmd_write_src_nvlist(hdl, &zc, props);
3653				(void) zfs_ioctl(hdl,
3654				    ZFS_IOC_SET_PROP, &zc);
3655				zcmd_free_nvlists(&zc);
3656			}
3657
3658			/* check for different snapname */
3659			if (strcmp(nvpair_name(snapelem),
3660			    stream_snapname) != 0) {
3661				char name[ZFS_MAX_DATASET_NAME_LEN];
3662				char tryname[ZFS_MAX_DATASET_NAME_LEN];
3663
3664				(void) snprintf(name, sizeof (name), "%s@%s",
3665				    fsname, nvpair_name(snapelem));
3666				(void) snprintf(tryname, sizeof (name), "%s@%s",
3667				    fsname, stream_snapname);
3668
3669				error = recv_rename(hdl, name, tryname,
3670				    strlen(fsname)+1, newname, flags);
3671				if (error)
3672					needagain = B_TRUE;
3673				else
3674					progress = B_TRUE;
3675			}
3676
3677			if (strcmp(stream_snapname, fromsnap) == 0)
3678				fromguid = thisguid;
3679		}
3680
3681		/* check for delete */
3682		if (stream_nvfs == NULL) {
3683			if (!flags->force)
3684				continue;
3685
3686			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
3687			    newname, flags);
3688			if (error)
3689				needagain = B_TRUE;
3690			else
3691				progress = B_TRUE;
3692			sprintf(guidname, "%llu",
3693			    (u_longlong_t)parent_fromsnap_guid);
3694			nvlist_add_boolean(deleted, guidname);
3695			continue;
3696		}
3697
3698		if (fromguid == 0) {
3699			if (flags->verbose) {
3700				(void) printf("local fs %s does not have "
3701				    "fromsnap (%s in stream); must have "
3702				    "been deleted locally; ignoring\n",
3703				    fsname, fromsnap);
3704			}
3705			continue;
3706		}
3707
3708		stream_fsname = fnvlist_lookup_string(stream_nvfs, "name");
3709		stream_parent_fromsnap_guid = fnvlist_lookup_uint64(
3710		    stream_nvfs, "parentfromsnap");
3711
3712		s1 = strrchr(fsname, '/');
3713		s2 = strrchr(stream_fsname, '/');
3714
3715		/*
3716		 * Check if we're going to rename based on parent guid change
3717		 * and the current parent guid was also deleted. If it was then
3718		 * rename will fail and is likely unneeded, so avoid this and
3719		 * force an early retry to determine the new
3720		 * parent_fromsnap_guid.
3721		 */
3722		if (stream_parent_fromsnap_guid != 0 &&
3723		    parent_fromsnap_guid != 0 &&
3724		    stream_parent_fromsnap_guid != parent_fromsnap_guid) {
3725			sprintf(guidname, "%llu",
3726			    (u_longlong_t)parent_fromsnap_guid);
3727			if (nvlist_exists(deleted, guidname)) {
3728				progress = B_TRUE;
3729				needagain = B_TRUE;
3730				goto doagain;
3731			}
3732		}
3733
3734		/*
3735		 * Check for rename. If the exact receive path is specified, it
3736		 * does not count as a rename, but we still need to check the
3737		 * datasets beneath it.
3738		 */
3739		if ((stream_parent_fromsnap_guid != 0 &&
3740		    parent_fromsnap_guid != 0 &&
3741		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
3742		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
3743		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
3744			nvlist_t *parent;
3745			char tryname[ZFS_MAX_DATASET_NAME_LEN];
3746
3747			parent = fsavl_find(local_avl,
3748			    stream_parent_fromsnap_guid, NULL);
3749			/*
3750			 * NB: parent might not be found if we used the
3751			 * tosnap for stream_parent_fromsnap_guid,
3752			 * because the parent is a newly-created fs;
3753			 * we'll be able to rename it after we recv the
3754			 * new fs.
3755			 */
3756			if (parent != NULL) {
3757				const char *pname;
3758
3759				pname = fnvlist_lookup_string(parent, "name");
3760				(void) snprintf(tryname, sizeof (tryname),
3761				    "%s%s", pname, strrchr(stream_fsname, '/'));
3762			} else {
3763				tryname[0] = '\0';
3764				if (flags->verbose) {
3765					(void) printf("local fs %s new parent "
3766					    "not found\n", fsname);
3767				}
3768			}
3769
3770			newname[0] = '\0';
3771
3772			error = recv_rename(hdl, fsname, tryname,
3773			    strlen(tofs)+1, newname, flags);
3774
3775			if (renamed != NULL && newname[0] != '\0') {
3776				fnvlist_add_boolean(renamed, newname);
3777			}
3778
3779			if (error)
3780				needagain = B_TRUE;
3781			else
3782				progress = B_TRUE;
3783		}
3784	}
3785
3786doagain:
3787	fsavl_destroy(local_avl);
3788	fnvlist_free(local_nv);
3789	fnvlist_free(deleted);
3790
3791	if (needagain && progress) {
3792		/* do another pass to fix up temporary names */
3793		if (flags->verbose)
3794			(void) printf("another pass:\n");
3795		goto again;
3796	}
3797
3798	return (needagain || error != 0);
3799}
3800
3801static int
3802zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
3803    recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
3804    char **top_zfs, nvlist_t *cmdprops)
3805{
3806	nvlist_t *stream_nv = NULL;
3807	avl_tree_t *stream_avl = NULL;
3808	const char *fromsnap = NULL;
3809	const char *sendsnap = NULL;
3810	char *cp;
3811	char tofs[ZFS_MAX_DATASET_NAME_LEN];
3812	char sendfs[ZFS_MAX_DATASET_NAME_LEN];
3813	char errbuf[ERRBUFLEN];
3814	dmu_replay_record_t drre;
3815	int error;
3816	boolean_t anyerr = B_FALSE;
3817	boolean_t softerr = B_FALSE;
3818	boolean_t recursive, raw;
3819
3820	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3821	    "cannot receive"));
3822
3823	assert(drr->drr_type == DRR_BEGIN);
3824	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
3825	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
3826	    DMU_COMPOUNDSTREAM);
3827
3828	/*
3829	 * Read in the nvlist from the stream.
3830	 */
3831	if (drr->drr_payloadlen != 0) {
3832		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
3833		    &stream_nv, flags->byteswap, zc);
3834		if (error) {
3835			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3836			goto out;
3837		}
3838	}
3839
3840	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3841	    ENOENT);
3842	raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0);
3843
3844	if (recursive && strchr(destname, '@')) {
3845		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3846		    "cannot specify snapshot name for multi-snapshot stream"));
3847		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3848		goto out;
3849	}
3850
3851	/*
3852	 * Read in the end record and verify checksum.
3853	 */
3854	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
3855	    flags->byteswap, NULL)))
3856		goto out;
3857	if (flags->byteswap) {
3858		drre.drr_type = BSWAP_32(drre.drr_type);
3859		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
3860		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
3861		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
3862		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
3863		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
3864		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
3865		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
3866		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
3867	}
3868	if (drre.drr_type != DRR_END) {
3869		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3870		goto out;
3871	}
3872	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
3873		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3874		    "incorrect header checksum"));
3875		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3876		goto out;
3877	}
3878
3879	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
3880
3881	if (drr->drr_payloadlen != 0) {
3882		nvlist_t *stream_fss;
3883
3884		stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
3885		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
3886			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3887			    "couldn't allocate avl tree"));
3888			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
3889			goto out;
3890		}
3891
3892		if (fromsnap != NULL && recursive) {
3893			nvlist_t *renamed = NULL;
3894			nvpair_t *pair = NULL;
3895
3896			(void) strlcpy(tofs, destname, sizeof (tofs));
3897			if (flags->isprefix) {
3898				struct drr_begin *drrb = &drr->drr_u.drr_begin;
3899				int i;
3900
3901				if (flags->istail) {
3902					cp = strrchr(drrb->drr_toname, '/');
3903					if (cp == NULL) {
3904						(void) strlcat(tofs, "/",
3905						    sizeof (tofs));
3906						i = 0;
3907					} else {
3908						i = (cp - drrb->drr_toname);
3909					}
3910				} else {
3911					i = strcspn(drrb->drr_toname, "/@");
3912				}
3913				/* zfs_receive_one() will create_parents() */
3914				(void) strlcat(tofs, &drrb->drr_toname[i],
3915				    sizeof (tofs));
3916				*strchr(tofs, '@') = '\0';
3917			}
3918
3919			if (!flags->dryrun && !flags->nomount) {
3920				renamed = fnvlist_alloc();
3921			}
3922
3923			softerr = recv_incremental_replication(hdl, tofs, flags,
3924			    stream_nv, stream_avl, renamed);
3925
3926			/* Unmount renamed filesystems before receiving. */
3927			while ((pair = nvlist_next_nvpair(renamed,
3928			    pair)) != NULL) {
3929				zfs_handle_t *zhp;
3930				prop_changelist_t *clp = NULL;
3931
3932				zhp = zfs_open(hdl, nvpair_name(pair),
3933				    ZFS_TYPE_FILESYSTEM);
3934				if (zhp != NULL) {
3935					clp = changelist_gather(zhp,
3936					    ZFS_PROP_MOUNTPOINT, 0,
3937					    flags->forceunmount ? MS_FORCE : 0);
3938					zfs_close(zhp);
3939					if (clp != NULL) {
3940						softerr |=
3941						    changelist_prefix(clp);
3942						changelist_free(clp);
3943					}
3944				}
3945			}
3946
3947			fnvlist_free(renamed);
3948		}
3949	}
3950
3951	/*
3952	 * Get the fs specified by the first path in the stream (the top level
3953	 * specified by 'zfs send') and pass it to each invocation of
3954	 * zfs_receive_one().
3955	 */
3956	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
3957	    sizeof (sendfs));
3958	if ((cp = strchr(sendfs, '@')) != NULL) {
3959		*cp = '\0';
3960		/*
3961		 * Find the "sendsnap", the final snapshot in a replication
3962		 * stream.  zfs_receive_one() handles certain errors
3963		 * differently, depending on if the contained stream is the
3964		 * last one or not.
3965		 */
3966		sendsnap = (cp + 1);
3967	}
3968
3969	/* Finally, receive each contained stream */
3970	do {
3971		/*
3972		 * we should figure out if it has a recoverable
3973		 * error, in which case do a recv_skip() and drive on.
3974		 * Note, if we fail due to already having this guid,
3975		 * zfs_receive_one() will take care of it (ie,
3976		 * recv_skip() and return 0).
3977		 */
3978		error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
3979		    sendfs, stream_nv, stream_avl, top_zfs, sendsnap, cmdprops);
3980		if (error == ENODATA) {
3981			error = 0;
3982			break;
3983		}
3984		anyerr |= error;
3985	} while (error == 0);
3986
3987	if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) {
3988		/*
3989		 * Now that we have the fs's they sent us, try the
3990		 * renames again.
3991		 */
3992		softerr = recv_incremental_replication(hdl, tofs, flags,
3993		    stream_nv, stream_avl, NULL);
3994	}
3995
3996	if (raw && softerr == 0 && *top_zfs != NULL) {
3997		softerr = recv_fix_encryption_hierarchy(hdl, *top_zfs,
3998		    stream_nv);
3999	}
4000
4001out:
4002	fsavl_destroy(stream_avl);
4003	fnvlist_free(stream_nv);
4004	if (softerr)
4005		error = -2;
4006	if (anyerr)
4007		error = -1;
4008	return (error);
4009}
4010
4011static void
4012trunc_prop_errs(int truncated)
4013{
4014	ASSERT(truncated != 0);
4015
4016	if (truncated == 1)
4017		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4018		    "1 more property could not be set\n"));
4019	else
4020		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4021		    "%d more properties could not be set\n"), truncated);
4022}
4023
4024static int
4025recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
4026{
4027	dmu_replay_record_t *drr;
4028	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
4029	uint64_t payload_size;
4030	char errbuf[ERRBUFLEN];
4031
4032	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4033	    "cannot receive"));
4034
4035	/* XXX would be great to use lseek if possible... */
4036	drr = buf;
4037
4038	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
4039	    byteswap, NULL) == 0) {
4040		if (byteswap)
4041			drr->drr_type = BSWAP_32(drr->drr_type);
4042
4043		switch (drr->drr_type) {
4044		case DRR_BEGIN:
4045			if (drr->drr_payloadlen != 0) {
4046				(void) recv_read(hdl, fd, buf,
4047				    drr->drr_payloadlen, B_FALSE, NULL);
4048			}
4049			break;
4050
4051		case DRR_END:
4052			free(buf);
4053			return (0);
4054
4055		case DRR_OBJECT:
4056			if (byteswap) {
4057				drr->drr_u.drr_object.drr_bonuslen =
4058				    BSWAP_32(drr->drr_u.drr_object.
4059				    drr_bonuslen);
4060				drr->drr_u.drr_object.drr_raw_bonuslen =
4061				    BSWAP_32(drr->drr_u.drr_object.
4062				    drr_raw_bonuslen);
4063			}
4064
4065			payload_size =
4066			    DRR_OBJECT_PAYLOAD_SIZE(&drr->drr_u.drr_object);
4067			(void) recv_read(hdl, fd, buf, payload_size,
4068			    B_FALSE, NULL);
4069			break;
4070
4071		case DRR_WRITE:
4072			if (byteswap) {
4073				drr->drr_u.drr_write.drr_logical_size =
4074				    BSWAP_64(
4075				    drr->drr_u.drr_write.drr_logical_size);
4076				drr->drr_u.drr_write.drr_compressed_size =
4077				    BSWAP_64(
4078				    drr->drr_u.drr_write.drr_compressed_size);
4079			}
4080			payload_size =
4081			    DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
4082			assert(payload_size <= SPA_MAXBLOCKSIZE);
4083			(void) recv_read(hdl, fd, buf,
4084			    payload_size, B_FALSE, NULL);
4085			break;
4086		case DRR_SPILL:
4087			if (byteswap) {
4088				drr->drr_u.drr_spill.drr_length =
4089				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
4090				drr->drr_u.drr_spill.drr_compressed_size =
4091				    BSWAP_64(drr->drr_u.drr_spill.
4092				    drr_compressed_size);
4093			}
4094
4095			payload_size =
4096			    DRR_SPILL_PAYLOAD_SIZE(&drr->drr_u.drr_spill);
4097			(void) recv_read(hdl, fd, buf, payload_size,
4098			    B_FALSE, NULL);
4099			break;
4100		case DRR_WRITE_EMBEDDED:
4101			if (byteswap) {
4102				drr->drr_u.drr_write_embedded.drr_psize =
4103				    BSWAP_32(drr->drr_u.drr_write_embedded.
4104				    drr_psize);
4105			}
4106			(void) recv_read(hdl, fd, buf,
4107			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
4108			    8), B_FALSE, NULL);
4109			break;
4110		case DRR_OBJECT_RANGE:
4111		case DRR_WRITE_BYREF:
4112		case DRR_FREEOBJECTS:
4113		case DRR_FREE:
4114			break;
4115
4116		default:
4117			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4118			    "invalid record type"));
4119			free(buf);
4120			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4121		}
4122	}
4123
4124	free(buf);
4125	return (-1);
4126}
4127
4128static void
4129recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
4130    boolean_t resumable, boolean_t checksum)
4131{
4132	char target_fs[ZFS_MAX_DATASET_NAME_LEN];
4133
4134	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, (checksum ?
4135	    "checksum mismatch" : "incomplete stream")));
4136
4137	if (!resumable)
4138		return;
4139	(void) strlcpy(target_fs, target_snap, sizeof (target_fs));
4140	*strchr(target_fs, '@') = '\0';
4141	zfs_handle_t *zhp = zfs_open(hdl, target_fs,
4142	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
4143	if (zhp == NULL)
4144		return;
4145
4146	char token_buf[ZFS_MAXPROPLEN];
4147	int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
4148	    token_buf, sizeof (token_buf),
4149	    NULL, NULL, 0, B_TRUE);
4150	if (error == 0) {
4151		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4152		    "checksum mismatch or incomplete stream.\n"
4153		    "Partially received snapshot is saved.\n"
4154		    "A resuming stream can be generated on the sending "
4155		    "system by running:\n"
4156		    "    zfs send -t %s"),
4157		    token_buf);
4158	}
4159	zfs_close(zhp);
4160}
4161
4162/*
4163 * Prepare a new nvlist of properties that are to override (-o) or be excluded
4164 * (-x) from the received dataset
4165 * recvprops: received properties from the send stream
4166 * cmdprops: raw input properties from command line
4167 * origprops: properties, both locally-set and received, currently set on the
4168 *            target dataset if it exists, NULL otherwise.
4169 * oxprops: valid output override (-o) and excluded (-x) properties
4170 */
4171static int
4172zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type,
4173    char *fsname, boolean_t zoned, boolean_t recursive, boolean_t newfs,
4174    boolean_t raw, boolean_t toplevel, nvlist_t *recvprops, nvlist_t *cmdprops,
4175    nvlist_t *origprops, nvlist_t **oxprops, uint8_t **wkeydata_out,
4176    uint_t *wkeylen_out, const char *errbuf)
4177{
4178	nvpair_t *nvp;
4179	nvlist_t *oprops, *voprops;
4180	zfs_handle_t *zhp = NULL;
4181	zpool_handle_t *zpool_hdl = NULL;
4182	char *cp;
4183	int ret = 0;
4184	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
4185
4186	if (nvlist_empty(cmdprops))
4187		return (0); /* No properties to override or exclude */
4188
4189	*oxprops = fnvlist_alloc();
4190	oprops = fnvlist_alloc();
4191
4192	strlcpy(namebuf, fsname, ZFS_MAX_DATASET_NAME_LEN);
4193
4194	/*
4195	 * Get our dataset handle. The target dataset may not exist yet.
4196	 */
4197	if (zfs_dataset_exists(hdl, namebuf, ZFS_TYPE_DATASET)) {
4198		zhp = zfs_open(hdl, namebuf, ZFS_TYPE_DATASET);
4199		if (zhp == NULL) {
4200			ret = -1;
4201			goto error;
4202		}
4203	}
4204
4205	/* open the zpool handle */
4206	cp = strchr(namebuf, '/');
4207	if (cp != NULL)
4208		*cp = '\0';
4209	zpool_hdl = zpool_open(hdl, namebuf);
4210	if (zpool_hdl == NULL) {
4211		ret = -1;
4212		goto error;
4213	}
4214
4215	/* restore namebuf to match fsname for later use */
4216	if (cp != NULL)
4217		*cp = '/';
4218
4219	/*
4220	 * first iteration: process excluded (-x) properties now and gather
4221	 * added (-o) properties to be later processed by zfs_valid_proplist()
4222	 */
4223	nvp = NULL;
4224	while ((nvp = nvlist_next_nvpair(cmdprops, nvp)) != NULL) {
4225		const char *name = nvpair_name(nvp);
4226		zfs_prop_t prop = zfs_name_to_prop(name);
4227
4228		/*
4229		 * It turns out, if we don't normalize "aliased" names
4230		 * e.g. compress= against the "real" names (e.g. compression)
4231		 * here, then setting/excluding them does not work as
4232		 * intended.
4233		 *
4234		 * But since user-defined properties wouldn't have a valid
4235		 * mapping here, we do this conditional dance.
4236		 */
4237		const char *newname = name;
4238		if (prop >= ZFS_PROP_TYPE)
4239			newname = zfs_prop_to_name(prop);
4240
4241		/* "origin" is processed separately, don't handle it here */
4242		if (prop == ZFS_PROP_ORIGIN)
4243			continue;
4244
4245		/* raw streams can't override encryption properties */
4246		if ((zfs_prop_encryption_key_param(prop) ||
4247		    prop == ZFS_PROP_ENCRYPTION) && raw) {
4248			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4249			    "encryption property '%s' cannot "
4250			    "be set or excluded for raw streams."), name);
4251			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4252			goto error;
4253		}
4254
4255		/*
4256		 * For plain replicated send, we can ignore encryption
4257		 * properties other than first stream
4258		 */
4259		if ((zfs_prop_encryption_key_param(prop) || prop ==
4260		    ZFS_PROP_ENCRYPTION) && !newfs && recursive && !raw) {
4261			continue;
4262		}
4263
4264		/* incremental streams can only exclude encryption properties */
4265		if ((zfs_prop_encryption_key_param(prop) ||
4266		    prop == ZFS_PROP_ENCRYPTION) && !newfs &&
4267		    nvpair_type(nvp) != DATA_TYPE_BOOLEAN) {
4268			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4269			    "encryption property '%s' cannot "
4270			    "be set for incremental streams."), name);
4271			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4272			goto error;
4273		}
4274
4275		switch (nvpair_type(nvp)) {
4276		case DATA_TYPE_BOOLEAN: /* -x property */
4277			/*
4278			 * DATA_TYPE_BOOLEAN is the way we're asked to "exclude"
4279			 * a property: this is done by forcing an explicit
4280			 * inherit on the destination so the effective value is
4281			 * not the one we received from the send stream.
4282			 */
4283			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
4284			    !zfs_prop_user(name)) {
4285				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4286				    "Warning: %s: property '%s' does not "
4287				    "apply to datasets of this type\n"),
4288				    fsname, name);
4289				continue;
4290			}
4291			/*
4292			 * We do this only if the property is not already
4293			 * locally-set, in which case its value will take
4294			 * priority over the received anyway.
4295			 */
4296			if (nvlist_exists(origprops, newname)) {
4297				nvlist_t *attrs;
4298				const char *source = NULL;
4299
4300				attrs = fnvlist_lookup_nvlist(origprops,
4301				    newname);
4302				if (nvlist_lookup_string(attrs,
4303				    ZPROP_SOURCE, &source) == 0 &&
4304				    strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)
4305					continue;
4306			}
4307			/*
4308			 * We can't force an explicit inherit on non-inheritable
4309			 * properties: if we're asked to exclude this kind of
4310			 * values we remove them from "recvprops" input nvlist.
4311			 */
4312			if (!zfs_prop_user(name) && /* can be inherited too */
4313			    !zfs_prop_inheritable(prop) &&
4314			    nvlist_exists(recvprops, newname))
4315				fnvlist_remove(recvprops, newname);
4316			else
4317				fnvlist_add_boolean(*oxprops, newname);
4318			break;
4319		case DATA_TYPE_STRING: /* -o property=value */
4320			/*
4321			 * we're trying to override a property that does not
4322			 * make sense for this type of dataset, but we don't
4323			 * want to fail if the receive is recursive: this comes
4324			 * in handy when the send stream contains, for
4325			 * instance, a child ZVOL and we're trying to receive
4326			 * it with "-o atime=on"
4327			 */
4328			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
4329			    !zfs_prop_user(name)) {
4330				if (recursive)
4331					continue;
4332				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4333				    "property '%s' does not apply to datasets "
4334				    "of this type"), name);
4335				ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4336				goto error;
4337			}
4338			fnvlist_add_string(oprops, newname,
4339			    fnvpair_value_string(nvp));
4340			break;
4341		default:
4342			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4343			    "property '%s' must be a string or boolean"), name);
4344			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4345			goto error;
4346		}
4347	}
4348
4349	if (toplevel) {
4350		/* convert override strings properties to native */
4351		if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET,
4352		    oprops, zoned, zhp, zpool_hdl, B_FALSE, errbuf)) == NULL) {
4353			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4354			goto error;
4355		}
4356
4357		/*
4358		 * zfs_crypto_create() requires the parent name. Get it
4359		 * by truncating the fsname copy stored in namebuf.
4360		 */
4361		cp = strrchr(namebuf, '/');
4362		if (cp != NULL)
4363			*cp = '\0';
4364
4365		if (!raw && !(!newfs && recursive) &&
4366		    zfs_crypto_create(hdl, namebuf, voprops, NULL,
4367		    B_FALSE, wkeydata_out, wkeylen_out) != 0) {
4368			fnvlist_free(voprops);
4369			ret = zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
4370			goto error;
4371		}
4372
4373		/* second pass: process "-o" properties */
4374		fnvlist_merge(*oxprops, voprops);
4375		fnvlist_free(voprops);
4376	} else {
4377		/* override props on child dataset are inherited */
4378		nvp = NULL;
4379		while ((nvp = nvlist_next_nvpair(oprops, nvp)) != NULL) {
4380			const char *name = nvpair_name(nvp);
4381			fnvlist_add_boolean(*oxprops, name);
4382		}
4383	}
4384
4385error:
4386	if (zhp != NULL)
4387		zfs_close(zhp);
4388	if (zpool_hdl != NULL)
4389		zpool_close(zpool_hdl);
4390	fnvlist_free(oprops);
4391	return (ret);
4392}
4393
4394/*
4395 * Restores a backup of tosnap from the file descriptor specified by infd.
4396 */
4397static int
4398zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
4399    const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
4400    dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
4401    avl_tree_t *stream_avl, char **top_zfs,
4402    const char *finalsnap, nvlist_t *cmdprops)
4403{
4404	struct timespec begin_time;
4405	int ioctl_err, ioctl_errno, err;
4406	char *cp;
4407	struct drr_begin *drrb = &drr->drr_u.drr_begin;
4408	char errbuf[ERRBUFLEN];
4409	const char *chopprefix;
4410	boolean_t newfs = B_FALSE;
4411	boolean_t stream_wantsnewfs, stream_resumingnewfs;
4412	boolean_t newprops = B_FALSE;
4413	uint64_t read_bytes = 0;
4414	uint64_t errflags = 0;
4415	uint64_t parent_snapguid = 0;
4416	prop_changelist_t *clp = NULL;
4417	nvlist_t *snapprops_nvlist = NULL;
4418	nvlist_t *snapholds_nvlist = NULL;
4419	zprop_errflags_t prop_errflags;
4420	nvlist_t *prop_errors = NULL;
4421	boolean_t recursive;
4422	const char *snapname = NULL;
4423	char destsnap[MAXPATHLEN * 2];
4424	char origin[MAXNAMELEN] = {0};
4425	char name[MAXPATHLEN];
4426	char tmp_keylocation[MAXNAMELEN] = {0};
4427	nvlist_t *rcvprops = NULL; /* props received from the send stream */
4428	nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */
4429	nvlist_t *origprops = NULL; /* original props (if destination exists) */
4430	zfs_type_t type = ZFS_TYPE_INVALID;
4431	boolean_t toplevel = B_FALSE;
4432	boolean_t zoned = B_FALSE;
4433	boolean_t hastoken = B_FALSE;
4434	boolean_t redacted;
4435	uint8_t *wkeydata = NULL;
4436	uint_t wkeylen = 0;
4437
4438#ifndef CLOCK_MONOTONIC_RAW
4439#define	CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC
4440#endif
4441	clock_gettime(CLOCK_MONOTONIC_RAW, &begin_time);
4442
4443	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4444	    "cannot receive"));
4445
4446	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
4447	    ENOENT);
4448
4449	/* Did the user request holds be skipped via zfs recv -k? */
4450	boolean_t holds = flags->holds && !flags->skipholds;
4451
4452	if (stream_avl != NULL) {
4453		const char *keylocation = NULL;
4454		nvlist_t *lookup = NULL;
4455		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
4456		    &snapname);
4457
4458		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
4459		    &parent_snapguid);
4460		err = nvlist_lookup_nvlist(fs, "props", &rcvprops);
4461		if (err) {
4462			rcvprops = fnvlist_alloc();
4463			newprops = B_TRUE;
4464		}
4465
4466		/*
4467		 * The keylocation property may only be set on encryption roots,
4468		 * but this dataset might not become an encryption root until
4469		 * recv_fix_encryption_hierarchy() is called. That function
4470		 * will fixup the keylocation anyway, so we temporarily unset
4471		 * the keylocation for now to avoid any errors from the receive
4472		 * ioctl.
4473		 */
4474		err = nvlist_lookup_string(rcvprops,
4475		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
4476		if (err == 0) {
4477			strlcpy(tmp_keylocation, keylocation, MAXNAMELEN);
4478			(void) nvlist_remove_all(rcvprops,
4479			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
4480		}
4481
4482		if (flags->canmountoff) {
4483			fnvlist_add_uint64(rcvprops,
4484			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0);
4485		} else if (newprops) {	/* nothing in rcvprops, eliminate it */
4486			fnvlist_free(rcvprops);
4487			rcvprops = NULL;
4488			newprops = B_FALSE;
4489		}
4490		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) {
4491			snapprops_nvlist = fnvlist_lookup_nvlist(lookup,
4492			    snapname);
4493		}
4494		if (holds) {
4495			if (0 == nvlist_lookup_nvlist(fs, "snapholds",
4496			    &lookup)) {
4497				snapholds_nvlist = fnvlist_lookup_nvlist(
4498				    lookup, snapname);
4499			}
4500		}
4501	}
4502
4503	cp = NULL;
4504
4505	/*
4506	 * Determine how much of the snapshot name stored in the stream
4507	 * we are going to tack on to the name they specified on the
4508	 * command line, and how much we are going to chop off.
4509	 *
4510	 * If they specified a snapshot, chop the entire name stored in
4511	 * the stream.
4512	 */
4513	if (flags->istail) {
4514		/*
4515		 * A filesystem was specified with -e. We want to tack on only
4516		 * the tail of the sent snapshot path.
4517		 */
4518		if (strchr(tosnap, '@')) {
4519			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4520			    "argument - snapshot not allowed with -e"));
4521			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4522			goto out;
4523		}
4524
4525		chopprefix = strrchr(sendfs, '/');
4526
4527		if (chopprefix == NULL) {
4528			/*
4529			 * The tail is the poolname, so we need to
4530			 * prepend a path separator.
4531			 */
4532			int len = strlen(drrb->drr_toname);
4533			cp = umem_alloc(len + 2, UMEM_NOFAIL);
4534			cp[0] = '/';
4535			(void) strcpy(&cp[1], drrb->drr_toname);
4536			chopprefix = cp;
4537		} else {
4538			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
4539		}
4540	} else if (flags->isprefix) {
4541		/*
4542		 * A filesystem was specified with -d. We want to tack on
4543		 * everything but the first element of the sent snapshot path
4544		 * (all but the pool name).
4545		 */
4546		if (strchr(tosnap, '@')) {
4547			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4548			    "argument - snapshot not allowed with -d"));
4549			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4550			goto out;
4551		}
4552
4553		chopprefix = strchr(drrb->drr_toname, '/');
4554		if (chopprefix == NULL)
4555			chopprefix = strchr(drrb->drr_toname, '@');
4556	} else if (strchr(tosnap, '@') == NULL) {
4557		/*
4558		 * If a filesystem was specified without -d or -e, we want to
4559		 * tack on everything after the fs specified by 'zfs send'.
4560		 */
4561		chopprefix = drrb->drr_toname + strlen(sendfs);
4562	} else {
4563		/* A snapshot was specified as an exact path (no -d or -e). */
4564		if (recursive) {
4565			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4566			    "cannot specify snapshot name for multi-snapshot "
4567			    "stream"));
4568			err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4569			goto out;
4570		}
4571		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
4572	}
4573
4574	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
4575	ASSERT(chopprefix > drrb->drr_toname || strchr(sendfs, '/') == NULL);
4576	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname) ||
4577	    strchr(sendfs, '/') == NULL);
4578	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
4579	    chopprefix[0] == '\0');
4580
4581	/*
4582	 * Determine name of destination snapshot.
4583	 */
4584	(void) strlcpy(destsnap, tosnap, sizeof (destsnap));
4585	(void) strlcat(destsnap, chopprefix, sizeof (destsnap));
4586	if (cp != NULL)
4587		umem_free(cp, strlen(cp) + 1);
4588	if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) {
4589		err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4590		goto out;
4591	}
4592
4593	/*
4594	 * Determine the name of the origin snapshot.
4595	 */
4596	if (originsnap) {
4597		(void) strlcpy(origin, originsnap, sizeof (origin));
4598		if (flags->verbose)
4599			(void) printf("using provided clone origin %s\n",
4600			    origin);
4601	} else if (drrb->drr_flags & DRR_FLAG_CLONE) {
4602		if (guid_to_name(hdl, destsnap,
4603		    drrb->drr_fromguid, B_FALSE, origin) != 0) {
4604			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4605			    "local origin for clone %s does not exist"),
4606			    destsnap);
4607			err = zfs_error(hdl, EZFS_NOENT, errbuf);
4608			goto out;
4609		}
4610		if (flags->verbose)
4611			(void) printf("found clone origin %s\n", origin);
4612	}
4613
4614	if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4615	    DMU_BACKUP_FEATURE_DEDUP)) {
4616		(void) fprintf(stderr,
4617		    gettext("ERROR: \"zfs receive\" no longer supports "
4618		    "deduplicated send streams.  Use\n"
4619		    "the \"zstream redup\" command to convert this stream "
4620		    "to a regular,\n"
4621		    "non-deduplicated stream.\n"));
4622		err = zfs_error(hdl, EZFS_NOTSUP, errbuf);
4623		goto out;
4624	}
4625
4626	boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4627	    DMU_BACKUP_FEATURE_RESUMING;
4628	boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4629	    DMU_BACKUP_FEATURE_RAW;
4630	boolean_t embedded = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4631	    DMU_BACKUP_FEATURE_EMBED_DATA;
4632	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
4633	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
4634	stream_resumingnewfs = (drrb->drr_fromguid == 0 ||
4635	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && resuming;
4636
4637	if (stream_wantsnewfs) {
4638		/*
4639		 * if the parent fs does not exist, look for it based on
4640		 * the parent snap GUID
4641		 */
4642		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4643		    "cannot receive new filesystem stream"));
4644
4645		(void) strlcpy(name, destsnap, sizeof (name));
4646		cp = strrchr(name, '/');
4647		if (cp)
4648			*cp = '\0';
4649		if (cp &&
4650		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4651			char suffix[ZFS_MAX_DATASET_NAME_LEN];
4652			(void) strlcpy(suffix, strrchr(destsnap, '/'),
4653			    sizeof (suffix));
4654			if (guid_to_name(hdl, name, parent_snapguid,
4655			    B_FALSE, destsnap) == 0) {
4656				*strchr(destsnap, '@') = '\0';
4657				(void) strlcat(destsnap, suffix,
4658				    sizeof (destsnap));
4659			}
4660		}
4661	} else {
4662		/*
4663		 * If the fs does not exist, look for it based on the
4664		 * fromsnap GUID.
4665		 */
4666		if (resuming) {
4667			(void) snprintf(errbuf, sizeof (errbuf),
4668			    dgettext(TEXT_DOMAIN,
4669			    "cannot receive resume stream"));
4670		} else {
4671			(void) snprintf(errbuf, sizeof (errbuf),
4672			    dgettext(TEXT_DOMAIN,
4673			    "cannot receive incremental stream"));
4674		}
4675
4676		(void) strlcpy(name, destsnap, sizeof (name));
4677		*strchr(name, '@') = '\0';
4678
4679		/*
4680		 * If the exact receive path was specified and this is the
4681		 * topmost path in the stream, then if the fs does not exist we
4682		 * should look no further.
4683		 */
4684		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
4685		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
4686		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4687			char snap[ZFS_MAX_DATASET_NAME_LEN];
4688			(void) strlcpy(snap, strchr(destsnap, '@'),
4689			    sizeof (snap));
4690			if (guid_to_name(hdl, name, drrb->drr_fromguid,
4691			    B_FALSE, destsnap) == 0) {
4692				*strchr(destsnap, '@') = '\0';
4693				(void) strlcat(destsnap, snap,
4694				    sizeof (destsnap));
4695			}
4696		}
4697	}
4698
4699	(void) strlcpy(name, destsnap, sizeof (name));
4700	*strchr(name, '@') = '\0';
4701
4702	redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4703	    DMU_BACKUP_FEATURE_REDACTED;
4704
4705	if (flags->heal) {
4706		if (flags->isprefix || flags->istail || flags->force ||
4707		    flags->canmountoff || flags->resumable || flags->nomount ||
4708		    flags->skipholds) {
4709			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4710			    "corrective recv can not be used when combined with"
4711			    " this flag"));
4712			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4713			goto out;
4714		}
4715		uint64_t guid =
4716		    get_snap_guid(hdl, name, strchr(destsnap, '@') + 1);
4717		if (guid == 0) {
4718			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4719			    "corrective recv must specify an existing snapshot"
4720			    " to heal"));
4721			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4722			goto out;
4723		} else if (guid != drrb->drr_toguid) {
4724			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4725			    "local snapshot doesn't match the snapshot"
4726			    " in the provided stream"));
4727			err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4728			goto out;
4729		}
4730	} else if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4731		zfs_cmd_t zc = {"\0"};
4732		zfs_handle_t *zhp = NULL;
4733		boolean_t encrypted;
4734
4735		(void) strcpy(zc.zc_name, name);
4736
4737		/*
4738		 * Destination fs exists.  It must be one of these cases:
4739		 *  - an incremental send stream
4740		 *  - the stream specifies a new fs (full stream or clone)
4741		 *    and they want us to blow away the existing fs (and
4742		 *    have therefore specified -F and removed any snapshots)
4743		 *  - we are resuming a failed receive.
4744		 */
4745		if (stream_wantsnewfs) {
4746			boolean_t is_volume = drrb->drr_type == DMU_OST_ZVOL;
4747			if (!flags->force) {
4748				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4749				    "destination '%s' exists\n"
4750				    "must specify -F to overwrite it"), name);
4751				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4752				goto out;
4753			}
4754			if (zfs_ioctl(hdl, ZFS_IOC_SNAPSHOT_LIST_NEXT,
4755			    &zc) == 0) {
4756				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4757				    "destination has snapshots (eg. %s)\n"
4758				    "must destroy them to overwrite it"),
4759				    zc.zc_name);
4760				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4761				goto out;
4762			}
4763			if (is_volume && strrchr(name, '/') == NULL) {
4764				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4765				    "destination %s is the root dataset\n"
4766				    "cannot overwrite with a ZVOL"),
4767				    name);
4768				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4769				goto out;
4770			}
4771			if (is_volume &&
4772			    zfs_ioctl(hdl, ZFS_IOC_DATASET_LIST_NEXT,
4773			    &zc) == 0) {
4774				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4775				    "destination has children (eg. %s)\n"
4776				    "cannot overwrite with a ZVOL"),
4777				    zc.zc_name);
4778				err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4779				goto out;
4780			}
4781		}
4782
4783		if ((zhp = zfs_open(hdl, name,
4784		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
4785			err = -1;
4786			goto out;
4787		}
4788
4789		/*
4790		 * When receiving full/newfs on existing dataset, then it
4791		 * should be done with "-F" flag. Its enforced for initial
4792		 * receive in previous checks in this function.
4793		 * Similarly, on resuming full/newfs recv on existing dataset,
4794		 * it should be done with "-F" flag.
4795		 *
4796		 * When dataset doesn't exist, then full/newfs recv is done on
4797		 * newly created dataset and it's marked INCONSISTENT. But
4798		 * When receiving on existing dataset, recv is first done on
4799		 * %recv and its marked INCONSISTENT. Existing dataset is not
4800		 * marked INCONSISTENT.
4801		 * Resume of full/newfs receive with dataset not INCONSISTENT
4802		 * indicates that its resuming newfs on existing dataset. So,
4803		 * enforce "-F" flag in this case.
4804		 */
4805		if (stream_resumingnewfs &&
4806		    !zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
4807		    !flags->force) {
4808			zfs_close(zhp);
4809			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4810			    "Resuming recv on existing destination '%s'\n"
4811			    "must specify -F to overwrite it"), name);
4812			err = zfs_error(hdl, EZFS_RESUME_EXISTS, errbuf);
4813			goto out;
4814		}
4815
4816		if (stream_wantsnewfs &&
4817		    zhp->zfs_dmustats.dds_origin[0]) {
4818			zfs_close(zhp);
4819			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4820			    "destination '%s' is a clone\n"
4821			    "must destroy it to overwrite it"), name);
4822			err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4823			goto out;
4824		}
4825
4826		/*
4827		 * Raw sends can not be performed as an incremental on top
4828		 * of existing unencrypted datasets. zfs recv -F can't be
4829		 * used to blow away an existing encrypted filesystem. This
4830		 * is because it would require the dsl dir to point to the
4831		 * new key (or lack of a key) and the old key at the same
4832		 * time. The -F flag may still be used for deleting
4833		 * intermediate snapshots that would otherwise prevent the
4834		 * receive from working.
4835		 */
4836		encrypted = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) !=
4837		    ZIO_CRYPT_OFF;
4838		if (!stream_wantsnewfs && !encrypted && raw) {
4839			zfs_close(zhp);
4840			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4841			    "cannot perform raw receive on top of "
4842			    "existing unencrypted dataset"));
4843			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4844			goto out;
4845		}
4846
4847		if (stream_wantsnewfs && flags->force &&
4848		    ((raw && !encrypted) || encrypted)) {
4849			zfs_close(zhp);
4850			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4851			    "zfs receive -F cannot be used to destroy an "
4852			    "encrypted filesystem or overwrite an "
4853			    "unencrypted one with an encrypted one"));
4854			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4855			goto out;
4856		}
4857
4858		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
4859		    (stream_wantsnewfs || stream_resumingnewfs)) {
4860			/* We can't do online recv in this case */
4861			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
4862			    flags->forceunmount ? MS_FORCE : 0);
4863			if (clp == NULL) {
4864				zfs_close(zhp);
4865				err = -1;
4866				goto out;
4867			}
4868			if (changelist_prefix(clp) != 0) {
4869				changelist_free(clp);
4870				zfs_close(zhp);
4871				err = -1;
4872				goto out;
4873			}
4874		}
4875
4876		/*
4877		 * If we are resuming a newfs, set newfs here so that we will
4878		 * mount it if the recv succeeds this time.  We can tell
4879		 * that it was a newfs on the first recv because the fs
4880		 * itself will be inconsistent (if the fs existed when we
4881		 * did the first recv, we would have received it into
4882		 * .../%recv).
4883		 */
4884		if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
4885			newfs = B_TRUE;
4886
4887		/* we want to know if we're zoned when validating -o|-x props */
4888		zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
4889
4890		/* may need this info later, get it now we have zhp around */
4891		if (zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, NULL, 0,
4892		    NULL, NULL, 0, B_TRUE) == 0)
4893			hastoken = B_TRUE;
4894
4895		/* gather existing properties on destination */
4896		origprops = fnvlist_alloc();
4897		fnvlist_merge(origprops, zhp->zfs_props);
4898		fnvlist_merge(origprops, zhp->zfs_user_props);
4899
4900		zfs_close(zhp);
4901	} else {
4902		zfs_handle_t *zhp;
4903
4904		/*
4905		 * Destination filesystem does not exist.  Therefore we better
4906		 * be creating a new filesystem (either from a full backup, or
4907		 * a clone).  It would therefore be invalid if the user
4908		 * specified only the pool name (i.e. if the destination name
4909		 * contained no slash character).
4910		 */
4911		cp = strrchr(name, '/');
4912
4913		if (!stream_wantsnewfs || cp == NULL) {
4914			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4915			    "destination '%s' does not exist"), name);
4916			err = zfs_error(hdl, EZFS_NOENT, errbuf);
4917			goto out;
4918		}
4919
4920		/*
4921		 * Trim off the final dataset component so we perform the
4922		 * recvbackup ioctl to the filesystems's parent.
4923		 */
4924		*cp = '\0';
4925
4926		if (flags->isprefix && !flags->istail && !flags->dryrun &&
4927		    create_parents(hdl, destsnap, strlen(tosnap)) != 0) {
4928			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4929			goto out;
4930		}
4931
4932		/* validate parent */
4933		zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
4934		if (zhp == NULL) {
4935			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4936			goto out;
4937		}
4938		if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
4939			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4940			    "parent '%s' is not a filesystem"), name);
4941			err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4942			zfs_close(zhp);
4943			goto out;
4944		}
4945
4946		zfs_close(zhp);
4947
4948		newfs = B_TRUE;
4949		*cp = '/';
4950	}
4951
4952	if (flags->verbose) {
4953		(void) printf("%s %s%s stream of %s into %s\n",
4954		    flags->dryrun ? "would receive" : "receiving",
4955		    flags->heal ? " corrective" : "",
4956		    drrb->drr_fromguid ? "incremental" : "full",
4957		    drrb->drr_toname, destsnap);
4958		(void) fflush(stdout);
4959	}
4960
4961	/*
4962	 * If this is the top-level dataset, record it so we can use it
4963	 * for recursive operations later.
4964	 */
4965	if (top_zfs != NULL &&
4966	    (*top_zfs == NULL || strcmp(*top_zfs, name) == 0)) {
4967		toplevel = B_TRUE;
4968		if (*top_zfs == NULL)
4969			*top_zfs = zfs_strdup(hdl, name);
4970	}
4971
4972	if (drrb->drr_type == DMU_OST_ZVOL) {
4973		type = ZFS_TYPE_VOLUME;
4974	} else if (drrb->drr_type == DMU_OST_ZFS) {
4975		type = ZFS_TYPE_FILESYSTEM;
4976	} else {
4977		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4978		    "invalid record type: 0x%d"), drrb->drr_type);
4979		err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4980		goto out;
4981	}
4982	if ((err = zfs_setup_cmdline_props(hdl, type, name, zoned, recursive,
4983	    stream_wantsnewfs, raw, toplevel, rcvprops, cmdprops, origprops,
4984	    &oxprops, &wkeydata, &wkeylen, errbuf)) != 0)
4985		goto out;
4986
4987	/*
4988	 * When sending with properties (zfs send -p), the encryption property
4989	 * is not included because it is a SETONCE property and therefore
4990	 * treated as read only. However, we are always able to determine its
4991	 * value because raw sends will include it in the DRR_BDEGIN payload
4992	 * and non-raw sends with properties are not allowed for encrypted
4993	 * datasets. Therefore, if this is a non-raw properties stream, we can
4994	 * infer that the value should be ZIO_CRYPT_OFF and manually add that
4995	 * to the received properties.
4996	 */
4997	if (stream_wantsnewfs && !raw && rcvprops != NULL &&
4998	    !nvlist_exists(cmdprops, zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) {
4999		if (oxprops == NULL)
5000			oxprops = fnvlist_alloc();
5001		fnvlist_add_uint64(oxprops,
5002		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF);
5003	}
5004
5005	if (flags->dryrun) {
5006		void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
5007
5008		/*
5009		 * We have read the DRR_BEGIN record, but we have
5010		 * not yet read the payload. For non-dryrun sends
5011		 * this will be done by the kernel, so we must
5012		 * emulate that here, before attempting to read
5013		 * more records.
5014		 */
5015		err = recv_read(hdl, infd, buf, drr->drr_payloadlen,
5016		    flags->byteswap, NULL);
5017		free(buf);
5018		if (err != 0)
5019			goto out;
5020
5021		err = recv_skip(hdl, infd, flags->byteswap);
5022		goto out;
5023	}
5024
5025	if (flags->heal) {
5026		err = ioctl_err = lzc_receive_with_heal(destsnap, rcvprops,
5027		    oxprops, wkeydata, wkeylen, origin, flags->force,
5028		    flags->heal, flags->resumable, raw, infd, drr_noswap, -1,
5029		    &read_bytes, &errflags, NULL, &prop_errors);
5030	} else {
5031		err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
5032		    oxprops, wkeydata, wkeylen, origin, flags->force,
5033		    flags->resumable, raw, infd, drr_noswap, -1, &read_bytes,
5034		    &errflags, NULL, &prop_errors);
5035	}
5036	ioctl_errno = ioctl_err;
5037	prop_errflags = errflags;
5038
5039	if (err == 0) {
5040		nvpair_t *prop_err = NULL;
5041
5042		while ((prop_err = nvlist_next_nvpair(prop_errors,
5043		    prop_err)) != NULL) {
5044			char tbuf[1024];
5045			zfs_prop_t prop;
5046			int intval;
5047
5048			prop = zfs_name_to_prop(nvpair_name(prop_err));
5049			(void) nvpair_value_int32(prop_err, &intval);
5050			if (strcmp(nvpair_name(prop_err),
5051			    ZPROP_N_MORE_ERRORS) == 0) {
5052				trunc_prop_errs(intval);
5053				break;
5054			} else if (snapname == NULL || finalsnap == NULL ||
5055			    strcmp(finalsnap, snapname) == 0 ||
5056			    strcmp(nvpair_name(prop_err),
5057			    zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
5058				/*
5059				 * Skip the special case of, for example,
5060				 * "refquota", errors on intermediate
5061				 * snapshots leading up to a final one.
5062				 * That's why we have all of the checks above.
5063				 *
5064				 * See zfs_ioctl.c's extract_delay_props() for
5065				 * a list of props which can fail on
5066				 * intermediate snapshots, but shouldn't
5067				 * affect the overall receive.
5068				 */
5069				(void) snprintf(tbuf, sizeof (tbuf),
5070				    dgettext(TEXT_DOMAIN,
5071				    "cannot receive %s property on %s"),
5072				    nvpair_name(prop_err), name);
5073				zfs_setprop_error(hdl, prop, intval, tbuf);
5074			}
5075		}
5076	}
5077
5078	if (err == 0 && snapprops_nvlist) {
5079		zfs_cmd_t zc = {"\0"};
5080
5081		(void) strlcpy(zc.zc_name, destsnap, sizeof (zc.zc_name));
5082		zc.zc_cookie = B_TRUE; /* received */
5083		zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist);
5084		(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
5085		zcmd_free_nvlists(&zc);
5086	}
5087	if (err == 0 && snapholds_nvlist) {
5088		nvpair_t *pair;
5089		nvlist_t *holds, *errors = NULL;
5090		int cleanup_fd = -1;
5091
5092		VERIFY(0 == nvlist_alloc(&holds, 0, KM_SLEEP));
5093		for (pair = nvlist_next_nvpair(snapholds_nvlist, NULL);
5094		    pair != NULL;
5095		    pair = nvlist_next_nvpair(snapholds_nvlist, pair)) {
5096			fnvlist_add_string(holds, destsnap, nvpair_name(pair));
5097		}
5098		(void) lzc_hold(holds, cleanup_fd, &errors);
5099		fnvlist_free(snapholds_nvlist);
5100		fnvlist_free(holds);
5101	}
5102
5103	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
5104		/*
5105		 * It may be that this snapshot already exists,
5106		 * in which case we want to consume & ignore it
5107		 * rather than failing.
5108		 */
5109		avl_tree_t *local_avl;
5110		nvlist_t *local_nv, *fs;
5111		cp = strchr(destsnap, '@');
5112
5113		/*
5114		 * XXX Do this faster by just iterating over snaps in
5115		 * this fs.  Also if zc_value does not exist, we will
5116		 * get a strange "does not exist" error message.
5117		 */
5118		*cp = '\0';
5119		if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
5120		    B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE,
5121		    B_TRUE, &local_nv, &local_avl) == 0) {
5122			*cp = '@';
5123			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
5124			fsavl_destroy(local_avl);
5125			fnvlist_free(local_nv);
5126
5127			if (fs != NULL) {
5128				if (flags->verbose) {
5129					(void) printf("snap %s already exists; "
5130					    "ignoring\n", destsnap);
5131				}
5132				err = ioctl_err = recv_skip(hdl, infd,
5133				    flags->byteswap);
5134			}
5135		}
5136		*cp = '@';
5137	}
5138
5139	if (ioctl_err != 0) {
5140		switch (ioctl_errno) {
5141		case ENODEV:
5142			cp = strchr(destsnap, '@');
5143			*cp = '\0';
5144			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5145			    "most recent snapshot of %s does not\n"
5146			    "match incremental source"), destsnap);
5147			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
5148			*cp = '@';
5149			break;
5150		case ETXTBSY:
5151			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5152			    "destination %s has been modified\n"
5153			    "since most recent snapshot"), name);
5154			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
5155			break;
5156		case EACCES:
5157			if (flags->heal) {
5158				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5159				    "key must be loaded to do a non-raw "
5160				    "corrective recv on an encrypted "
5161				    "dataset."));
5162			} else if (raw && stream_wantsnewfs) {
5163				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5164				    "failed to create encryption key"));
5165			} else if (raw && !stream_wantsnewfs) {
5166				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5167				    "encryption key does not match "
5168				    "existing key"));
5169			} else {
5170				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5171				    "inherited key must be loaded"));
5172			}
5173			(void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
5174			break;
5175		case EEXIST:
5176			cp = strchr(destsnap, '@');
5177			if (newfs) {
5178				/* it's the containing fs that exists */
5179				*cp = '\0';
5180			}
5181			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5182			    "destination already exists"));
5183			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
5184			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
5185			    destsnap);
5186			*cp = '@';
5187			break;
5188		case EINVAL:
5189			if (embedded && !raw) {
5190				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5191				    "incompatible embedded data stream "
5192				    "feature with encrypted receive."));
5193			} else if (flags->resumable) {
5194				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5195				    "kernel modules must be upgraded to "
5196				    "receive this stream."));
5197			}
5198			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5199			break;
5200		case ECKSUM:
5201		case ZFS_ERR_STREAM_TRUNCATED:
5202			if (flags->heal)
5203				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5204				    "corrective receive was not able to "
5205				    "reconstruct the data needed for "
5206				    "healing."));
5207			else
5208				recv_ecksum_set_aux(hdl, destsnap,
5209				    flags->resumable, ioctl_err == ECKSUM);
5210			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5211			break;
5212		case ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH:
5213			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5214			    "incremental send stream requires -L "
5215			    "(--large-block), to match previous receive."));
5216			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5217			break;
5218		case ENOTSUP:
5219			if (flags->heal)
5220				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5221				    "stream is not compatible with the "
5222				    "data in the pool."));
5223			else
5224				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5225				    "pool must be upgraded to receive this "
5226				    "stream."));
5227			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
5228			break;
5229		case ZFS_ERR_CRYPTO_NOTSUP:
5230			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5231			    "stream uses crypto parameters not compatible with "
5232			    "this pool"));
5233			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5234			break;
5235		case EDQUOT:
5236			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5237			    "destination %s space quota exceeded."), name);
5238			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
5239			break;
5240		case ZFS_ERR_FROM_IVSET_GUID_MISSING:
5241			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5242			    "IV set guid missing. See errata %u at "
5243			    "https://openzfs.github.io/openzfs-docs/msg/"
5244			    "ZFS-8000-ER."),
5245			    ZPOOL_ERRATA_ZOL_8308_ENCRYPTION);
5246			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5247			break;
5248		case ZFS_ERR_FROM_IVSET_GUID_MISMATCH:
5249			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5250			    "IV set guid mismatch. See the 'zfs receive' "
5251			    "man page section\n discussing the limitations "
5252			    "of raw encrypted send streams."));
5253			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5254			break;
5255		case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING:
5256			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5257			    "Spill block flag missing for raw send.\n"
5258			    "The zfs software on the sending system must "
5259			    "be updated."));
5260			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5261			break;
5262		case ZFS_ERR_RESUME_EXISTS:
5263			cp = strchr(destsnap, '@');
5264			if (newfs) {
5265				/* it's the containing fs that exists */
5266				*cp = '\0';
5267			}
5268			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5269			    "Resuming recv on existing dataset without force"));
5270			(void) zfs_error_fmt(hdl, EZFS_RESUME_EXISTS,
5271			    dgettext(TEXT_DOMAIN, "cannot resume recv %s"),
5272			    destsnap);
5273			*cp = '@';
5274			break;
5275		case E2BIG:
5276			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5277			    "zfs receive required kernel memory allocation "
5278			    "larger than the system can support. Please file "
5279			    "an issue at the OpenZFS issue tracker:\n"
5280			    "https://github.com/openzfs/zfs/issues/new"));
5281			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5282			break;
5283		case EBUSY:
5284			if (hastoken) {
5285				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5286				    "destination %s contains "
5287				    "partially-complete state from "
5288				    "\"zfs receive -s\"."), name);
5289				(void) zfs_error(hdl, EZFS_BUSY, errbuf);
5290				break;
5291			}
5292			zfs_fallthrough;
5293		default:
5294			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
5295		}
5296	}
5297
5298	/*
5299	 * Mount the target filesystem (if created).  Also mount any
5300	 * children of the target filesystem if we did a replication
5301	 * receive (indicated by stream_avl being non-NULL).
5302	 */
5303	if (clp) {
5304		if (!flags->nomount)
5305			err |= changelist_postfix(clp);
5306		changelist_free(clp);
5307	}
5308
5309	if ((newfs || stream_avl) && type == ZFS_TYPE_FILESYSTEM && !redacted)
5310		flags->domount = B_TRUE;
5311
5312	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
5313		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
5314		    "failed to clear unreceived properties on %s"), name);
5315		(void) fprintf(stderr, "\n");
5316	}
5317	if (prop_errflags & ZPROP_ERR_NORESTORE) {
5318		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
5319		    "failed to restore original properties on %s"), name);
5320		(void) fprintf(stderr, "\n");
5321	}
5322
5323	if (err || ioctl_err) {
5324		err = -1;
5325		goto out;
5326	}
5327
5328	if (flags->verbose) {
5329		char buf1[64];
5330		char buf2[64];
5331		uint64_t bytes = read_bytes;
5332		struct timespec delta;
5333		clock_gettime(CLOCK_MONOTONIC_RAW, &delta);
5334		if (begin_time.tv_nsec > delta.tv_nsec) {
5335			delta.tv_nsec =
5336			    1000000000 + delta.tv_nsec - begin_time.tv_nsec;
5337			delta.tv_sec -= 1;
5338		} else
5339			delta.tv_nsec -= begin_time.tv_nsec;
5340		delta.tv_sec -= begin_time.tv_sec;
5341		if (delta.tv_sec == 0 && delta.tv_nsec == 0)
5342			delta.tv_nsec = 1;
5343		double delta_f = delta.tv_sec + (delta.tv_nsec / 1e9);
5344		zfs_nicebytes(bytes, buf1, sizeof (buf1));
5345		zfs_nicebytes(bytes / delta_f, buf2, sizeof (buf2));
5346
5347		(void) printf("received %s stream in %.2f seconds (%s/sec)\n",
5348		    buf1, delta_f, buf2);
5349	}
5350
5351	err = 0;
5352out:
5353	if (prop_errors != NULL)
5354		fnvlist_free(prop_errors);
5355
5356	if (tmp_keylocation[0] != '\0') {
5357		fnvlist_add_string(rcvprops,
5358		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation);
5359	}
5360
5361	if (newprops)
5362		fnvlist_free(rcvprops);
5363
5364	fnvlist_free(oxprops);
5365	fnvlist_free(origprops);
5366
5367	return (err);
5368}
5369
5370/*
5371 * Check properties we were asked to override (both -o|-x)
5372 */
5373static boolean_t
5374zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
5375    const char *errbuf)
5376{
5377	nvpair_t *nvp = NULL;
5378	zfs_prop_t prop;
5379	const char *name;
5380
5381	while ((nvp = nvlist_next_nvpair(props, nvp)) != NULL) {
5382		name = nvpair_name(nvp);
5383		prop = zfs_name_to_prop(name);
5384
5385		if (prop == ZPROP_USERPROP) {
5386			if (!zfs_prop_user(name)) {
5387				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5388				    "%s: invalid property '%s'"), errbuf, name);
5389				return (B_FALSE);
5390			}
5391			continue;
5392		}
5393		/*
5394		 * "origin" is readonly but is used to receive datasets as
5395		 * clones so we don't raise an error here
5396		 */
5397		if (prop == ZFS_PROP_ORIGIN)
5398			continue;
5399
5400		/* encryption params have their own verification later */
5401		if (prop == ZFS_PROP_ENCRYPTION ||
5402		    zfs_prop_encryption_key_param(prop))
5403			continue;
5404
5405		/*
5406		 * cannot override readonly, set-once and other specific
5407		 * settable properties
5408		 */
5409		if (zfs_prop_readonly(prop) || prop == ZFS_PROP_VERSION ||
5410		    prop == ZFS_PROP_VOLSIZE) {
5411			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5412			    "%s: invalid property '%s'"), errbuf, name);
5413			return (B_FALSE);
5414		}
5415	}
5416
5417	return (B_TRUE);
5418}
5419
5420static int
5421zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
5422    const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
5423    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs,
5424    const char *finalsnap, nvlist_t *cmdprops)
5425{
5426	int err;
5427	dmu_replay_record_t drr, drr_noswap;
5428	struct drr_begin *drrb = &drr.drr_u.drr_begin;
5429	char errbuf[ERRBUFLEN];
5430	zio_cksum_t zcksum = { { 0 } };
5431	uint64_t featureflags;
5432	int hdrtype;
5433
5434	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
5435	    "cannot receive"));
5436
5437	/* check cmdline props, raise an error if they cannot be received */
5438	if (!zfs_receive_checkprops(hdl, cmdprops, errbuf))
5439		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
5440
5441	if (flags->isprefix &&
5442	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
5443		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
5444		    "(%s) does not exist"), tosnap);
5445		return (zfs_error(hdl, EZFS_NOENT, errbuf));
5446	}
5447	if (originsnap &&
5448	    !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
5449		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
5450		    "(%s) does not exist"), originsnap);
5451		return (zfs_error(hdl, EZFS_NOENT, errbuf));
5452	}
5453
5454	/* read in the BEGIN record */
5455	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
5456	    &zcksum)))
5457		return (err);
5458
5459	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
5460		/* It's the double end record at the end of a package */
5461		return (ENODATA);
5462	}
5463
5464	/* the kernel needs the non-byteswapped begin record */
5465	drr_noswap = drr;
5466
5467	flags->byteswap = B_FALSE;
5468	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
5469		/*
5470		 * We computed the checksum in the wrong byteorder in
5471		 * recv_read() above; do it again correctly.
5472		 */
5473		memset(&zcksum, 0, sizeof (zio_cksum_t));
5474		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
5475		flags->byteswap = B_TRUE;
5476
5477		drr.drr_type = BSWAP_32(drr.drr_type);
5478		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
5479		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
5480		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
5481		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
5482		drrb->drr_type = BSWAP_32(drrb->drr_type);
5483		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
5484		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
5485		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
5486	}
5487
5488	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
5489		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5490		    "stream (bad magic number)"));
5491		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5492	}
5493
5494	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
5495	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
5496
5497	if (!DMU_STREAM_SUPPORTED(featureflags) ||
5498	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
5499		/*
5500		 * Let's be explicit about this one, since rather than
5501		 * being a new feature we can't know, it's an old
5502		 * feature we dropped.
5503		 */
5504		if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
5505			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5506			    "stream has deprecated feature: dedup, try "
5507			    "'zstream redup [send in a file] | zfs recv "
5508			    "[...]'"));
5509		} else {
5510			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5511			    "stream has unsupported feature, feature flags = "
5512			    "%llx (unknown flags = %llx)"),
5513			    (u_longlong_t)featureflags,
5514			    (u_longlong_t)((featureflags) &
5515			    ~DMU_BACKUP_FEATURE_MASK));
5516		}
5517		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5518	}
5519
5520	/* Holds feature is set once in the compound stream header. */
5521	if (featureflags & DMU_BACKUP_FEATURE_HOLDS)
5522		flags->holds = B_TRUE;
5523
5524	if (strchr(drrb->drr_toname, '@') == NULL) {
5525		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5526		    "stream (bad snapshot name)"));
5527		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5528	}
5529
5530	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
5531		char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
5532		if (sendfs == NULL) {
5533			/*
5534			 * We were not called from zfs_receive_package(). Get
5535			 * the fs specified by 'zfs send'.
5536			 */
5537			char *cp;
5538			(void) strlcpy(nonpackage_sendfs,
5539			    drr.drr_u.drr_begin.drr_toname,
5540			    sizeof (nonpackage_sendfs));
5541			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
5542				*cp = '\0';
5543			sendfs = nonpackage_sendfs;
5544			VERIFY(finalsnap == NULL);
5545		}
5546		return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
5547		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
5548		    finalsnap, cmdprops));
5549	} else {
5550		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
5551		    DMU_COMPOUNDSTREAM);
5552		return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
5553		    &zcksum, top_zfs, cmdprops));
5554	}
5555}
5556
5557/*
5558 * Restores a backup of tosnap from the file descriptor specified by infd.
5559 * Return 0 on total success, -2 if some things couldn't be
5560 * destroyed/renamed/promoted, -1 if some things couldn't be received.
5561 * (-1 will override -2, if -1 and the resumable flag was specified the
5562 * transfer can be resumed if the sending side supports it).
5563 */
5564int
5565zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
5566    recvflags_t *flags, int infd, avl_tree_t *stream_avl)
5567{
5568	char *top_zfs = NULL;
5569	int err;
5570	struct stat sb;
5571	const char *originsnap = NULL;
5572
5573	/*
5574	 * The only way fstat can fail is if we do not have a valid file
5575	 * descriptor.
5576	 */
5577	if (fstat(infd, &sb) == -1) {
5578		perror("fstat");
5579		return (-2);
5580	}
5581
5582	if (props) {
5583		err = nvlist_lookup_string(props, "origin", &originsnap);
5584		if (err && err != ENOENT)
5585			return (err);
5586	}
5587
5588	err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
5589	    stream_avl, &top_zfs, NULL, props);
5590
5591	if (err == 0 && !flags->nomount && flags->domount && top_zfs) {
5592		zfs_handle_t *zhp = NULL;
5593		prop_changelist_t *clp = NULL;
5594
5595		zhp = zfs_open(hdl, top_zfs,
5596		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
5597		if (zhp == NULL) {
5598			err = -1;
5599			goto out;
5600		} else {
5601			if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
5602				zfs_close(zhp);
5603				goto out;
5604			}
5605
5606			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
5607			    CL_GATHER_MOUNT_ALWAYS,
5608			    flags->forceunmount ? MS_FORCE : 0);
5609			zfs_close(zhp);
5610			if (clp == NULL) {
5611				err = -1;
5612				goto out;
5613			}
5614
5615			/* mount and share received datasets */
5616			err = changelist_postfix(clp);
5617			changelist_free(clp);
5618			if (err != 0)
5619				err = -1;
5620		}
5621	}
5622
5623out:
5624	if (top_zfs)
5625		free(top_zfs);
5626
5627	return (err);
5628}
5629