1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27 * All rights reserved
28 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
32 * Copyright (c) 2019 Datto Inc.
33 */
34
35#include <assert.h>
36#include <ctype.h>
37#include <errno.h>
38#include <libintl.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <unistd.h>
43#include <stddef.h>
44#include <fcntl.h>
45#include <sys/mount.h>
46#include <sys/mntent.h>
47#include <sys/mnttab.h>
48#include <sys/avl.h>
49#include <sys/debug.h>
50#include <sys/stat.h>
51#include <pthread.h>
52#include <umem.h>
53#include <time.h>
54
55#include <libzfs.h>
56#include <libzfs_core.h>
57#include <libzutil.h>
58
59#include "zfs_namecheck.h"
60#include "zfs_prop.h"
61#include "zfs_fletcher.h"
62#include "libzfs_impl.h"
63#include <cityhash.h>
64#include <zlib.h>
65#include <sys/zio_checksum.h>
66#include <sys/dsl_crypt.h>
67#include <sys/ddt.h>
68#include <sys/socket.h>
69#include <sys/sha2.h>
70
71static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
72    recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **,
73    const char *, nvlist_t *);
74static int guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
75    uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
76    uint64_t num_redact_snaps, char *name);
77static int guid_to_name(libzfs_handle_t *, const char *,
78    uint64_t, boolean_t, char *);
79
80typedef struct progress_arg {
81	zfs_handle_t *pa_zhp;
82	int pa_fd;
83	boolean_t pa_parsable;
84	boolean_t pa_estimate;
85	int pa_verbosity;
86	boolean_t pa_astitle;
87	boolean_t pa_progress;
88	uint64_t pa_size;
89} progress_arg_t;
90
91static int
92dump_record(dmu_replay_record_t *drr, void *payload, size_t payload_len,
93    zio_cksum_t *zc, int outfd)
94{
95	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
96	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
97	fletcher_4_incremental_native(drr,
98	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
99	if (drr->drr_type != DRR_BEGIN) {
100		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
101		    drr_checksum.drr_checksum));
102		drr->drr_u.drr_checksum.drr_checksum = *zc;
103	}
104	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
105	    sizeof (zio_cksum_t), zc);
106	if (write(outfd, drr, sizeof (*drr)) == -1)
107		return (errno);
108	if (payload_len != 0) {
109		fletcher_4_incremental_native(payload, payload_len, zc);
110		if (write(outfd, payload, payload_len) == -1)
111			return (errno);
112	}
113	return (0);
114}
115
116/*
117 * Routines for dealing with the AVL tree of fs-nvlists
118 */
119typedef struct fsavl_node {
120	avl_node_t fn_node;
121	nvlist_t *fn_nvfs;
122	const char *fn_snapname;
123	uint64_t fn_guid;
124} fsavl_node_t;
125
126static int
127fsavl_compare(const void *arg1, const void *arg2)
128{
129	const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
130	const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
131
132	return (TREE_CMP(fn1->fn_guid, fn2->fn_guid));
133}
134
135/*
136 * Given the GUID of a snapshot, find its containing filesystem and
137 * (optionally) name.
138 */
139static nvlist_t *
140fsavl_find(avl_tree_t *avl, uint64_t snapguid, const char **snapname)
141{
142	fsavl_node_t fn_find;
143	fsavl_node_t *fn;
144
145	fn_find.fn_guid = snapguid;
146
147	fn = avl_find(avl, &fn_find, NULL);
148	if (fn) {
149		if (snapname)
150			*snapname = fn->fn_snapname;
151		return (fn->fn_nvfs);
152	}
153	return (NULL);
154}
155
156static void
157fsavl_destroy(avl_tree_t *avl)
158{
159	fsavl_node_t *fn;
160	void *cookie;
161
162	if (avl == NULL)
163		return;
164
165	cookie = NULL;
166	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
167		free(fn);
168	avl_destroy(avl);
169	free(avl);
170}
171
172/*
173 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
174 */
175static avl_tree_t *
176fsavl_create(nvlist_t *fss)
177{
178	avl_tree_t *fsavl;
179	nvpair_t *fselem = NULL;
180
181	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
182		return (NULL);
183
184	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
185	    offsetof(fsavl_node_t, fn_node));
186
187	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
188		nvlist_t *nvfs, *snaps;
189		nvpair_t *snapelem = NULL;
190
191		nvfs = fnvpair_value_nvlist(fselem);
192		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
193
194		while ((snapelem =
195		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
196			fsavl_node_t *fn;
197
198			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
199				fsavl_destroy(fsavl);
200				return (NULL);
201			}
202			fn->fn_nvfs = nvfs;
203			fn->fn_snapname = nvpair_name(snapelem);
204			fn->fn_guid = fnvpair_value_uint64(snapelem);
205
206			/*
207			 * Note: if there are multiple snaps with the
208			 * same GUID, we ignore all but one.
209			 */
210			avl_index_t where = 0;
211			if (avl_find(fsavl, fn, &where) == NULL)
212				avl_insert(fsavl, fn, where);
213			else
214				free(fn);
215		}
216	}
217
218	return (fsavl);
219}
220
221/*
222 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
223 */
224typedef struct send_data {
225	/*
226	 * assigned inside every recursive call,
227	 * restored from *_save on return:
228	 *
229	 * guid of fromsnap snapshot in parent dataset
230	 * txg of fromsnap snapshot in current dataset
231	 * txg of tosnap snapshot in current dataset
232	 */
233
234	uint64_t parent_fromsnap_guid;
235	uint64_t fromsnap_txg;
236	uint64_t tosnap_txg;
237
238	/* the nvlists get accumulated during depth-first traversal */
239	nvlist_t *parent_snaps;
240	nvlist_t *fss;
241	nvlist_t *snapprops;
242	nvlist_t *snapholds;	/* user holds */
243
244	/* send-receive configuration, does not change during traversal */
245	const char *fsname;
246	const char *fromsnap;
247	const char *tosnap;
248	boolean_t recursive;
249	boolean_t raw;
250	boolean_t doall;
251	boolean_t replicate;
252	boolean_t skipmissing;
253	boolean_t verbose;
254	boolean_t backup;
255	boolean_t seenfrom;
256	boolean_t seento;
257	boolean_t holds;	/* were holds requested with send -h */
258	boolean_t props;
259
260	/*
261	 * The header nvlist is of the following format:
262	 * {
263	 *   "tosnap" -> string
264	 *   "fromsnap" -> string (if incremental)
265	 *   "fss" -> {
266	 *	id -> {
267	 *
268	 *	 "name" -> string (full name; for debugging)
269	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
270	 *
271	 *	 "props" -> { name -> value (only if set here) }
272	 *	 "snaps" -> { name (lastname) -> number (guid) }
273	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
274	 *	 "snapholds" -> { name (lastname) -> { holdname -> crtime } }
275	 *
276	 *	 "origin" -> number (guid) (if clone)
277	 *	 "is_encroot" -> boolean
278	 *	 "sent" -> boolean (not on-disk)
279	 *	}
280	 *   }
281	 * }
282	 *
283	 */
284} send_data_t;
285
286static void
287send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv);
288
289/*
290 * Collect guid, valid props, optionally holds, etc. of a snapshot.
291 * This interface is intended for use as a zfs_iter_snapshots_v2_sorted visitor.
292 */
293static int
294send_iterate_snap(zfs_handle_t *zhp, void *arg)
295{
296	send_data_t *sd = arg;
297	uint64_t guid = zhp->zfs_dmustats.dds_guid;
298	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
299	boolean_t isfromsnap, istosnap, istosnapwithnofrom;
300	char *snapname;
301	const char *from = sd->fromsnap;
302	const char *to = sd->tosnap;
303
304	snapname = strrchr(zhp->zfs_name, '@');
305	assert(snapname != NULL);
306	++snapname;
307
308	isfromsnap = (from != NULL && strcmp(from, snapname) == 0);
309	istosnap = (to != NULL && strcmp(to, snapname) == 0);
310	istosnapwithnofrom = (istosnap && from == NULL);
311
312	if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
313		if (sd->verbose) {
314			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
315			    "skipping snapshot %s because it was created "
316			    "after the destination snapshot (%s)\n"),
317			    zhp->zfs_name, to);
318		}
319		zfs_close(zhp);
320		return (0);
321	}
322
323	fnvlist_add_uint64(sd->parent_snaps, snapname, guid);
324
325	/*
326	 * NB: if there is no fromsnap here (it's a newly created fs in
327	 * an incremental replication), we will substitute the tosnap.
328	 */
329	if (isfromsnap || (sd->parent_fromsnap_guid == 0 && istosnap))
330		sd->parent_fromsnap_guid = guid;
331
332	if (!sd->recursive) {
333		/*
334		 * To allow a doall stream to work properly
335		 * with a NULL fromsnap
336		 */
337		if (sd->doall && from == NULL && !sd->seenfrom)
338			sd->seenfrom = B_TRUE;
339
340		if (!sd->seenfrom && isfromsnap) {
341			sd->seenfrom = B_TRUE;
342			zfs_close(zhp);
343			return (0);
344		}
345
346		if ((sd->seento || !sd->seenfrom) && !istosnapwithnofrom) {
347			zfs_close(zhp);
348			return (0);
349		}
350
351		if (istosnap)
352			sd->seento = B_TRUE;
353	}
354
355	nvlist_t *nv = fnvlist_alloc();
356	send_iterate_prop(zhp, sd->backup, nv);
357	fnvlist_add_nvlist(sd->snapprops, snapname, nv);
358	fnvlist_free(nv);
359
360	if (sd->holds) {
361		nvlist_t *holds;
362		if (lzc_get_holds(zhp->zfs_name, &holds) == 0) {
363			fnvlist_add_nvlist(sd->snapholds, snapname, holds);
364			fnvlist_free(holds);
365		}
366	}
367
368	zfs_close(zhp);
369	return (0);
370}
371
372/*
373 * Collect all valid props from the handle snap into an nvlist.
374 */
375static void
376send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv)
377{
378	nvlist_t *props;
379
380	if (received_only)
381		props = zfs_get_recvd_props(zhp);
382	else
383		props = zhp->zfs_props;
384
385	nvpair_t *elem = NULL;
386	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
387		const char *propname = nvpair_name(elem);
388		zfs_prop_t prop = zfs_name_to_prop(propname);
389
390		if (!zfs_prop_user(propname)) {
391			/*
392			 * Realistically, this should never happen.  However,
393			 * we want the ability to add DSL properties without
394			 * needing to make incompatible version changes.  We
395			 * need to ignore unknown properties to allow older
396			 * software to still send datasets containing these
397			 * properties, with the unknown properties elided.
398			 */
399			if (prop == ZPROP_INVAL)
400				continue;
401
402			if (zfs_prop_readonly(prop))
403				continue;
404		}
405
406		nvlist_t *propnv = fnvpair_value_nvlist(elem);
407
408		boolean_t isspacelimit = (prop == ZFS_PROP_QUOTA ||
409		    prop == ZFS_PROP_RESERVATION ||
410		    prop == ZFS_PROP_REFQUOTA ||
411		    prop == ZFS_PROP_REFRESERVATION);
412		if (isspacelimit && zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
413			continue;
414
415		const char *source;
416		if (nvlist_lookup_string(propnv, ZPROP_SOURCE, &source) == 0) {
417			if (strcmp(source, zhp->zfs_name) != 0 &&
418			    strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)
419				continue;
420		} else {
421			/*
422			 * May have no source before SPA_VERSION_RECVD_PROPS,
423			 * but is still modifiable.
424			 */
425			if (!isspacelimit)
426				continue;
427		}
428
429		if (zfs_prop_user(propname) ||
430		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
431			const char *value;
432			value = fnvlist_lookup_string(propnv, ZPROP_VALUE);
433			fnvlist_add_string(nv, propname, value);
434		} else {
435			uint64_t value;
436			value = fnvlist_lookup_uint64(propnv, ZPROP_VALUE);
437			fnvlist_add_uint64(nv, propname, value);
438		}
439	}
440}
441
442/*
443 * returns snapshot guid
444 * and returns 0 if the snapshot does not exist
445 */
446static uint64_t
447get_snap_guid(libzfs_handle_t *hdl, const char *fs, const char *snap)
448{
449	char name[MAXPATHLEN + 1];
450	uint64_t guid = 0;
451
452	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
453		return (guid);
454
455	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
456	zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
457	if (zhp != NULL) {
458		guid = zfs_prop_get_int(zhp, ZFS_PROP_GUID);
459		zfs_close(zhp);
460	}
461
462	return (guid);
463}
464
465/*
466 * returns snapshot creation txg
467 * and returns 0 if the snapshot does not exist
468 */
469static uint64_t
470get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
471{
472	char name[ZFS_MAX_DATASET_NAME_LEN];
473	uint64_t txg = 0;
474
475	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
476		return (txg);
477
478	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
479	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
480		zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
481		if (zhp != NULL) {
482			txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
483			zfs_close(zhp);
484		}
485	}
486
487	return (txg);
488}
489
490/*
491 * Recursively generate nvlists describing datasets.  See comment
492 * for the data structure send_data_t above for description of contents
493 * of the nvlist.
494 */
495static int
496send_iterate_fs(zfs_handle_t *zhp, void *arg)
497{
498	send_data_t *sd = arg;
499	nvlist_t *nvfs = NULL, *nv = NULL;
500	int rv = 0;
501	uint64_t min_txg = 0, max_txg = 0;
502	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
503	uint64_t guid = zhp->zfs_dmustats.dds_guid;
504	uint64_t fromsnap_txg, tosnap_txg;
505	char guidstring[64];
506
507	/* These fields are restored on return from a recursive call. */
508	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
509	uint64_t fromsnap_txg_save = sd->fromsnap_txg;
510	uint64_t tosnap_txg_save = sd->tosnap_txg;
511
512	fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
513	if (fromsnap_txg != 0)
514		sd->fromsnap_txg = fromsnap_txg;
515
516	tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
517	if (tosnap_txg != 0)
518		sd->tosnap_txg = tosnap_txg;
519
520	/*
521	 * On the send side, if the current dataset does not have tosnap,
522	 * perform two additional checks:
523	 *
524	 * - Skip sending the current dataset if it was created later than
525	 *   the parent tosnap.
526	 * - Return error if the current dataset was created earlier than
527	 *   the parent tosnap, unless --skip-missing specified. Then
528	 *   just print a warning.
529	 */
530	if (sd->tosnap != NULL && tosnap_txg == 0) {
531		if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
532			if (sd->verbose) {
533				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
534				    "skipping dataset %s: snapshot %s does "
535				    "not exist\n"), zhp->zfs_name, sd->tosnap);
536			}
537		} else if (sd->skipmissing) {
538			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
539			    "WARNING: skipping dataset %s and its children:"
540			    " snapshot %s does not exist\n"),
541			    zhp->zfs_name, sd->tosnap);
542		} else {
543			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
544			    "cannot send %s@%s%s: snapshot %s@%s does not "
545			    "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
546			    dgettext(TEXT_DOMAIN, " recursively") : "",
547			    zhp->zfs_name, sd->tosnap);
548			rv = EZFS_NOENT;
549		}
550		goto out;
551	}
552
553	nvfs = fnvlist_alloc();
554	fnvlist_add_string(nvfs, "name", zhp->zfs_name);
555	fnvlist_add_uint64(nvfs, "parentfromsnap", sd->parent_fromsnap_guid);
556
557	if (zhp->zfs_dmustats.dds_origin[0] != '\0') {
558		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
559		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
560		if (origin == NULL) {
561			rv = -1;
562			goto out;
563		}
564		fnvlist_add_uint64(nvfs, "origin",
565		    origin->zfs_dmustats.dds_guid);
566		zfs_close(origin);
567	}
568
569	/* Iterate over props. */
570	if (sd->props || sd->backup || sd->recursive) {
571		nv = fnvlist_alloc();
572		send_iterate_prop(zhp, sd->backup, nv);
573		fnvlist_add_nvlist(nvfs, "props", nv);
574	}
575	if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
576		boolean_t encroot;
577
578		/* Determine if this dataset is an encryption root. */
579		if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) {
580			rv = -1;
581			goto out;
582		}
583
584		if (encroot)
585			fnvlist_add_boolean(nvfs, "is_encroot");
586
587		/*
588		 * Encrypted datasets can only be sent with properties if
589		 * the raw flag is specified because the receive side doesn't
590		 * currently have a mechanism for recursively asking the user
591		 * for new encryption parameters.
592		 */
593		if (!sd->raw) {
594			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
595			    "cannot send %s@%s: encrypted dataset %s may not "
596			    "be sent with properties without the raw flag\n"),
597			    sd->fsname, sd->tosnap, zhp->zfs_name);
598			rv = -1;
599			goto out;
600		}
601
602	}
603
604	/*
605	 * Iterate over snaps, and set sd->parent_fromsnap_guid.
606	 *
607	 * If this is a "doall" send, a replicate send or we're just trying
608	 * to gather a list of previous snapshots, iterate through all the
609	 * snaps in the txg range. Otherwise just look at the one we're
610	 * interested in.
611	 */
612	sd->parent_fromsnap_guid = 0;
613	sd->parent_snaps = fnvlist_alloc();
614	sd->snapprops = fnvlist_alloc();
615	if (sd->holds)
616		sd->snapholds = fnvlist_alloc();
617	if (sd->doall || sd->replicate || sd->tosnap == NULL) {
618		if (!sd->replicate && fromsnap_txg != 0)
619			min_txg = fromsnap_txg;
620		if (!sd->replicate && tosnap_txg != 0)
621			max_txg = tosnap_txg;
622		(void) zfs_iter_snapshots_sorted_v2(zhp, 0, send_iterate_snap,
623		    sd, min_txg, max_txg);
624	} else {
625		char snapname[MAXPATHLEN] = { 0 };
626		zfs_handle_t *snap;
627
628		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
629		    zhp->zfs_name, sd->tosnap);
630		if (sd->fromsnap != NULL)
631			sd->seenfrom = B_TRUE;
632		snap = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
633		if (snap != NULL)
634			(void) send_iterate_snap(snap, sd);
635	}
636
637	fnvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps);
638	fnvlist_free(sd->parent_snaps);
639	fnvlist_add_nvlist(nvfs, "snapprops", sd->snapprops);
640	fnvlist_free(sd->snapprops);
641	if (sd->holds) {
642		fnvlist_add_nvlist(nvfs, "snapholds", sd->snapholds);
643		fnvlist_free(sd->snapholds);
644	}
645
646	/* Do not allow the size of the properties list to exceed the limit */
647	if ((fnvlist_size(nvfs) + fnvlist_size(sd->fss)) >
648	    zhp->zfs_hdl->libzfs_max_nvlist) {
649		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
650		    "warning: cannot send %s@%s: the size of the list of "
651		    "snapshots and properties is too large to be received "
652		    "successfully.\n"
653		    "Select a smaller number of snapshots to send.\n"),
654		    zhp->zfs_name, sd->tosnap);
655		rv = EZFS_NOSPC;
656		goto out;
657	}
658	/* Add this fs to nvlist. */
659	(void) snprintf(guidstring, sizeof (guidstring),
660	    "0x%llx", (longlong_t)guid);
661	fnvlist_add_nvlist(sd->fss, guidstring, nvfs);
662
663	/* Iterate over children. */
664	if (sd->recursive)
665		rv = zfs_iter_filesystems_v2(zhp, 0, send_iterate_fs, sd);
666
667out:
668	/* Restore saved fields. */
669	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
670	sd->fromsnap_txg = fromsnap_txg_save;
671	sd->tosnap_txg = tosnap_txg_save;
672
673	fnvlist_free(nv);
674	fnvlist_free(nvfs);
675
676	zfs_close(zhp);
677	return (rv);
678}
679
680static int
681gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
682    const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t doall,
683    boolean_t replicate, boolean_t skipmissing, boolean_t verbose,
684    boolean_t backup, boolean_t holds, boolean_t props, nvlist_t **nvlp,
685    avl_tree_t **avlp)
686{
687	zfs_handle_t *zhp;
688	send_data_t sd = { 0 };
689	int error;
690
691	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
692	if (zhp == NULL)
693		return (EZFS_BADTYPE);
694
695	sd.fss = fnvlist_alloc();
696	sd.fsname = fsname;
697	sd.fromsnap = fromsnap;
698	sd.tosnap = tosnap;
699	sd.recursive = recursive;
700	sd.raw = raw;
701	sd.doall = doall;
702	sd.replicate = replicate;
703	sd.skipmissing = skipmissing;
704	sd.verbose = verbose;
705	sd.backup = backup;
706	sd.holds = holds;
707	sd.props = props;
708
709	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
710		fnvlist_free(sd.fss);
711		if (avlp != NULL)
712			*avlp = NULL;
713		*nvlp = NULL;
714		return (error);
715	}
716
717	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
718		fnvlist_free(sd.fss);
719		*nvlp = NULL;
720		return (EZFS_NOMEM);
721	}
722
723	*nvlp = sd.fss;
724	return (0);
725}
726
727/*
728 * Routines specific to "zfs send"
729 */
730typedef struct send_dump_data {
731	/* these are all just the short snapname (the part after the @) */
732	const char *fromsnap;
733	const char *tosnap;
734	char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
735	uint64_t prevsnap_obj;
736	boolean_t seenfrom, seento, replicate, doall, fromorigin;
737	boolean_t dryrun, parsable, progress, embed_data, std_out;
738	boolean_t large_block, compress, raw, holds;
739	boolean_t progressastitle;
740	int outfd;
741	boolean_t err;
742	nvlist_t *fss;
743	nvlist_t *snapholds;
744	avl_tree_t *fsavl;
745	snapfilter_cb_t *filter_cb;
746	void *filter_cb_arg;
747	nvlist_t *debugnv;
748	char holdtag[ZFS_MAX_DATASET_NAME_LEN];
749	int cleanup_fd;
750	int verbosity;
751	uint64_t size;
752} send_dump_data_t;
753
754static int
755zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
756    enum lzc_send_flags flags, uint64_t *spacep)
757{
758	assert(snapname != NULL);
759
760	int error = lzc_send_space(snapname, from, flags, spacep);
761	if (error == 0)
762		return (0);
763
764	char errbuf[ERRBUFLEN];
765	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
766	    "warning: cannot estimate space for '%s'"), snapname);
767
768	libzfs_handle_t *hdl = zhp->zfs_hdl;
769	switch (error) {
770	case EXDEV:
771		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
772		    "not an earlier snapshot from the same fs"));
773		return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
774
775	case ENOENT:
776		if (zfs_dataset_exists(hdl, snapname,
777		    ZFS_TYPE_SNAPSHOT)) {
778			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
779			    "incremental source (%s) does not exist"),
780			    snapname);
781		}
782		return (zfs_error(hdl, EZFS_NOENT, errbuf));
783
784	case EDQUOT:
785	case EFBIG:
786	case EIO:
787	case ENOLINK:
788	case ENOSPC:
789	case ENOSTR:
790	case ENXIO:
791	case EPIPE:
792	case ERANGE:
793	case EFAULT:
794	case EROFS:
795	case EINVAL:
796		zfs_error_aux(hdl, "%s", zfs_strerror(error));
797		return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
798
799	default:
800		return (zfs_standard_error(hdl, error, errbuf));
801	}
802}
803
804/*
805 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
806 * NULL) to the file descriptor specified by outfd.
807 */
808static int
809dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
810    boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
811    nvlist_t *debugnv)
812{
813	zfs_cmd_t zc = {"\0"};
814	libzfs_handle_t *hdl = zhp->zfs_hdl;
815	nvlist_t *thisdbg;
816
817	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
818	assert(fromsnap_obj == 0 || !fromorigin);
819
820	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
821	zc.zc_cookie = outfd;
822	zc.zc_obj = fromorigin;
823	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
824	zc.zc_fromobj = fromsnap_obj;
825	zc.zc_flags = flags;
826
827	if (debugnv != NULL) {
828		thisdbg = fnvlist_alloc();
829		if (fromsnap != NULL && fromsnap[0] != '\0')
830			fnvlist_add_string(thisdbg, "fromsnap", fromsnap);
831	}
832
833	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
834		char errbuf[ERRBUFLEN];
835		int error = errno;
836
837		(void) snprintf(errbuf, sizeof (errbuf), "%s '%s'",
838		    dgettext(TEXT_DOMAIN, "warning: cannot send"),
839		    zhp->zfs_name);
840
841		if (debugnv != NULL) {
842			fnvlist_add_uint64(thisdbg, "error", error);
843			fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
844			fnvlist_free(thisdbg);
845		}
846
847		switch (error) {
848		case EXDEV:
849			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
850			    "not an earlier snapshot from the same fs"));
851			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
852
853		case EACCES:
854			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
855			    "source key must be loaded"));
856			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
857
858		case ENOENT:
859			if (zfs_dataset_exists(hdl, zc.zc_name,
860			    ZFS_TYPE_SNAPSHOT)) {
861				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
862				    "incremental source (@%s) does not exist"),
863				    zc.zc_value);
864			}
865			return (zfs_error(hdl, EZFS_NOENT, errbuf));
866
867		case EDQUOT:
868		case EFBIG:
869		case EIO:
870		case ENOLINK:
871		case ENOSPC:
872		case ENOSTR:
873		case ENXIO:
874		case EPIPE:
875		case ERANGE:
876		case EFAULT:
877		case EROFS:
878		case EINVAL:
879			zfs_error_aux(hdl, "%s", zfs_strerror(errno));
880			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
881
882		default:
883			return (zfs_standard_error(hdl, errno, errbuf));
884		}
885	}
886
887	if (debugnv != NULL) {
888		fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
889		fnvlist_free(thisdbg);
890	}
891
892	return (0);
893}
894
895static void
896gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
897{
898	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
899
900	/*
901	 * zfs_send() only sets snapholds for sends that need them,
902	 * e.g. replication and doall.
903	 */
904	if (sdd->snapholds == NULL)
905		return;
906
907	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
908}
909
910int
911zfs_send_progress(zfs_handle_t *zhp, int fd, uint64_t *bytes_written,
912    uint64_t *blocks_visited)
913{
914	zfs_cmd_t zc = {"\0"};
915
916	if (bytes_written != NULL)
917		*bytes_written = 0;
918	if (blocks_visited != NULL)
919		*blocks_visited = 0;
920	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
921	zc.zc_cookie = fd;
922	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
923		return (errno);
924	if (bytes_written != NULL)
925		*bytes_written = zc.zc_cookie;
926	if (blocks_visited != NULL)
927		*blocks_visited = zc.zc_objset_type;
928	return (0);
929}
930
931static volatile boolean_t send_progress_thread_signal_duetotimer;
932static void
933send_progress_thread_act(int sig, siginfo_t *info, void *ucontext)
934{
935	(void) sig, (void) ucontext;
936	send_progress_thread_signal_duetotimer = info->si_code == SI_TIMER;
937}
938
939struct timer_desirability {
940	timer_t timer;
941	boolean_t desired;
942};
943static void
944timer_delete_cleanup(void *timer)
945{
946	struct timer_desirability *td = timer;
947	if (td->desired)
948		timer_delete(td->timer);
949}
950
951#ifdef SIGINFO
952#define	SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO sigaddset(&new, SIGINFO)
953#else
954#define	SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO
955#endif
956#define	SEND_PROGRESS_THREAD_PARENT_BLOCK(old) { \
957	sigset_t new; \
958	sigemptyset(&new); \
959	sigaddset(&new, SIGUSR1); \
960	SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO; \
961	pthread_sigmask(SIG_BLOCK, &new, old); \
962}
963
964static void *
965send_progress_thread(void *arg)
966{
967	progress_arg_t *pa = arg;
968	zfs_handle_t *zhp = pa->pa_zhp;
969	uint64_t bytes;
970	uint64_t blocks;
971	uint64_t total = pa->pa_size / 100;
972	char buf[16];
973	time_t t;
974	struct tm tm;
975	int err;
976
977	const struct sigaction signal_action =
978	    {.sa_sigaction = send_progress_thread_act, .sa_flags = SA_SIGINFO};
979	struct sigevent timer_cfg =
980	    {.sigev_notify = SIGEV_SIGNAL, .sigev_signo = SIGUSR1};
981	const struct itimerspec timer_time =
982	    {.it_value = {.tv_sec = 1}, .it_interval = {.tv_sec = 1}};
983	struct timer_desirability timer = {};
984
985	sigaction(SIGUSR1, &signal_action, NULL);
986#ifdef SIGINFO
987	sigaction(SIGINFO, &signal_action, NULL);
988#endif
989
990	if ((timer.desired = pa->pa_progress || pa->pa_astitle)) {
991		if (timer_create(CLOCK_MONOTONIC, &timer_cfg, &timer.timer))
992			return ((void *)(uintptr_t)errno);
993		(void) timer_settime(timer.timer, 0, &timer_time, NULL);
994	}
995	pthread_cleanup_push(timer_delete_cleanup, &timer);
996
997	if (!pa->pa_parsable && pa->pa_progress) {
998		(void) fprintf(stderr,
999		    "TIME       %s   %sSNAPSHOT %s\n",
1000		    pa->pa_estimate ? "BYTES" : " SENT",
1001		    pa->pa_verbosity >= 2 ? "   BLOCKS    " : "",
1002		    zhp->zfs_name);
1003	}
1004
1005	/*
1006	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1007	 */
1008	for (;;) {
1009		pause();
1010		if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes,
1011		    &blocks)) != 0) {
1012			if (err == EINTR || err == ENOENT)
1013				err = 0;
1014			pthread_exit(((void *)(uintptr_t)err));
1015		}
1016
1017		(void) time(&t);
1018		localtime_r(&t, &tm);
1019
1020		if (pa->pa_astitle) {
1021			char buf_bytes[16];
1022			char buf_size[16];
1023			int pct;
1024			zfs_nicenum(bytes, buf_bytes, sizeof (buf_bytes));
1025			zfs_nicenum(pa->pa_size, buf_size, sizeof (buf_size));
1026			pct = (total > 0) ? bytes / total : 100;
1027			zfs_setproctitle("sending %s (%d%%: %s/%s)",
1028			    zhp->zfs_name, MIN(pct, 100), buf_bytes, buf_size);
1029		}
1030
1031		if (pa->pa_verbosity >= 2 && pa->pa_parsable) {
1032			(void) fprintf(stderr,
1033			    "%02d:%02d:%02d\t%llu\t%llu\t%s\n",
1034			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1035			    (u_longlong_t)bytes, (u_longlong_t)blocks,
1036			    zhp->zfs_name);
1037		} else if (pa->pa_verbosity >= 2) {
1038			zfs_nicenum(bytes, buf, sizeof (buf));
1039			(void) fprintf(stderr,
1040			    "%02d:%02d:%02d   %5s    %8llu    %s\n",
1041			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1042			    buf, (u_longlong_t)blocks, zhp->zfs_name);
1043		} else if (pa->pa_parsable) {
1044			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1045			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1046			    (u_longlong_t)bytes, zhp->zfs_name);
1047		} else if (pa->pa_progress ||
1048		    !send_progress_thread_signal_duetotimer) {
1049			zfs_nicebytes(bytes, buf, sizeof (buf));
1050			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1051			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1052			    buf, zhp->zfs_name);
1053		}
1054	}
1055	pthread_cleanup_pop(B_TRUE);
1056	return (NULL);
1057}
1058
1059static boolean_t
1060send_progress_thread_exit(
1061    libzfs_handle_t *hdl, pthread_t ptid, sigset_t *oldmask)
1062{
1063	void *status = NULL;
1064	(void) pthread_cancel(ptid);
1065	(void) pthread_join(ptid, &status);
1066	pthread_sigmask(SIG_SETMASK, oldmask, NULL);
1067	int error = (int)(uintptr_t)status;
1068	if (error != 0 && status != PTHREAD_CANCELED)
1069		return (zfs_standard_error(hdl, error,
1070		    dgettext(TEXT_DOMAIN, "progress thread exited nonzero")));
1071	else
1072		return (B_FALSE);
1073}
1074
1075static void
1076send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1077    uint64_t size, boolean_t parsable)
1078{
1079	if (parsable) {
1080		if (fromsnap != NULL) {
1081			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1082			    "incremental\t%s\t%s"), fromsnap, tosnap);
1083		} else {
1084/*
1085 * Workaround for GCC 12+ with UBSan enabled deficencies.
1086 *
1087 * GCC 12+ invoked with -fsanitize=undefined incorrectly reports the code
1088 * below as violating -Wformat-overflow.
1089 */
1090#if defined(__GNUC__) && !defined(__clang__) && \
1091	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1092#pragma GCC diagnostic push
1093#pragma GCC diagnostic ignored "-Wformat-overflow"
1094#endif
1095			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1096			    "full\t%s"), tosnap);
1097#if defined(__GNUC__) && !defined(__clang__) && \
1098	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1099#pragma GCC diagnostic pop
1100#endif
1101		}
1102		(void) fprintf(fout, "\t%llu", (longlong_t)size);
1103	} else {
1104		if (fromsnap != NULL) {
1105			if (strchr(fromsnap, '@') == NULL &&
1106			    strchr(fromsnap, '#') == NULL) {
1107				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1108				    "send from @%s to %s"), fromsnap, tosnap);
1109			} else {
1110				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1111				    "send from %s to %s"), fromsnap, tosnap);
1112			}
1113		} else {
1114			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1115			    "full send of %s"), tosnap);
1116		}
1117		if (size != 0) {
1118			char buf[16];
1119			zfs_nicebytes(size, buf, sizeof (buf));
1120/*
1121 * Workaround for GCC 12+ with UBSan enabled deficencies.
1122 *
1123 * GCC 12+ invoked with -fsanitize=undefined incorrectly reports the code
1124 * below as violating -Wformat-overflow.
1125 */
1126#if defined(__GNUC__) && !defined(__clang__) && \
1127	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1128#pragma GCC diagnostic push
1129#pragma GCC diagnostic ignored "-Wformat-overflow"
1130#endif
1131			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1132			    " estimated size is %s"), buf);
1133#if defined(__GNUC__) && !defined(__clang__) && \
1134	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1135#pragma GCC diagnostic pop
1136#endif
1137		}
1138	}
1139	(void) fprintf(fout, "\n");
1140}
1141
1142/*
1143 * Send a single filesystem snapshot, updating the send dump data.
1144 * This interface is intended for use as a zfs_iter_snapshots_v2_sorted visitor.
1145 */
1146static int
1147dump_snapshot(zfs_handle_t *zhp, void *arg)
1148{
1149	send_dump_data_t *sdd = arg;
1150	progress_arg_t pa = { 0 };
1151	pthread_t tid;
1152	char *thissnap;
1153	enum lzc_send_flags flags = 0;
1154	int err;
1155	boolean_t isfromsnap, istosnap, fromorigin;
1156	boolean_t exclude = B_FALSE;
1157	FILE *fout = sdd->std_out ? stdout : stderr;
1158
1159	err = 0;
1160	thissnap = strchr(zhp->zfs_name, '@') + 1;
1161	isfromsnap = (sdd->fromsnap != NULL &&
1162	    strcmp(sdd->fromsnap, thissnap) == 0);
1163
1164	if (!sdd->seenfrom && isfromsnap) {
1165		gather_holds(zhp, sdd);
1166		sdd->seenfrom = B_TRUE;
1167		(void) strlcpy(sdd->prevsnap, thissnap, sizeof (sdd->prevsnap));
1168		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1169		zfs_close(zhp);
1170		return (0);
1171	}
1172
1173	if (sdd->seento || !sdd->seenfrom) {
1174		zfs_close(zhp);
1175		return (0);
1176	}
1177
1178	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1179	if (istosnap)
1180		sdd->seento = B_TRUE;
1181
1182	if (sdd->large_block)
1183		flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1184	if (sdd->embed_data)
1185		flags |= LZC_SEND_FLAG_EMBED_DATA;
1186	if (sdd->compress)
1187		flags |= LZC_SEND_FLAG_COMPRESS;
1188	if (sdd->raw)
1189		flags |= LZC_SEND_FLAG_RAW;
1190
1191	if (!sdd->doall && !isfromsnap && !istosnap) {
1192		if (sdd->replicate) {
1193			const char *snapname;
1194			nvlist_t *snapprops;
1195			/*
1196			 * Filter out all intermediate snapshots except origin
1197			 * snapshots needed to replicate clones.
1198			 */
1199			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1200			    zhp->zfs_dmustats.dds_guid, &snapname);
1201
1202			if (nvfs != NULL) {
1203				snapprops = fnvlist_lookup_nvlist(nvfs,
1204				    "snapprops");
1205				snapprops = fnvlist_lookup_nvlist(snapprops,
1206				    thissnap);
1207				exclude = !nvlist_exists(snapprops,
1208				    "is_clone_origin");
1209			}
1210		} else {
1211			exclude = B_TRUE;
1212		}
1213	}
1214
1215	/*
1216	 * If a filter function exists, call it to determine whether
1217	 * this snapshot will be sent.
1218	 */
1219	if (exclude || (sdd->filter_cb != NULL &&
1220	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1221		/*
1222		 * This snapshot is filtered out.  Don't send it, and don't
1223		 * set prevsnap_obj, so it will be as if this snapshot didn't
1224		 * exist, and the next accepted snapshot will be sent as
1225		 * an incremental from the last accepted one, or as the
1226		 * first (and full) snapshot in the case of a replication,
1227		 * non-incremental send.
1228		 */
1229		zfs_close(zhp);
1230		return (0);
1231	}
1232
1233	gather_holds(zhp, sdd);
1234	fromorigin = sdd->prevsnap[0] == '\0' &&
1235	    (sdd->fromorigin || sdd->replicate);
1236
1237	if (sdd->verbosity != 0) {
1238		uint64_t size = 0;
1239		char fromds[ZFS_MAX_DATASET_NAME_LEN];
1240
1241		if (sdd->prevsnap[0] != '\0') {
1242			(void) strlcpy(fromds, zhp->zfs_name, sizeof (fromds));
1243			*(strchr(fromds, '@') + 1) = '\0';
1244			(void) strlcat(fromds, sdd->prevsnap, sizeof (fromds));
1245		}
1246		if (zfs_send_space(zhp, zhp->zfs_name,
1247		    sdd->prevsnap[0] ? fromds : NULL, flags, &size) == 0) {
1248			send_print_verbose(fout, zhp->zfs_name,
1249			    sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1250			    size, sdd->parsable);
1251			sdd->size += size;
1252		}
1253	}
1254
1255	if (!sdd->dryrun) {
1256		/*
1257		 * If progress reporting is requested, spawn a new thread to
1258		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1259		 */
1260		sigset_t oldmask;
1261		{
1262			pa.pa_zhp = zhp;
1263			pa.pa_fd = sdd->outfd;
1264			pa.pa_parsable = sdd->parsable;
1265			pa.pa_estimate = B_FALSE;
1266			pa.pa_verbosity = sdd->verbosity;
1267			pa.pa_size = sdd->size;
1268			pa.pa_astitle = sdd->progressastitle;
1269			pa.pa_progress = sdd->progress;
1270
1271			if ((err = pthread_create(&tid, NULL,
1272			    send_progress_thread, &pa)) != 0) {
1273				zfs_close(zhp);
1274				return (err);
1275			}
1276			SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
1277		}
1278
1279		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1280		    fromorigin, sdd->outfd, flags, sdd->debugnv);
1281
1282		if (send_progress_thread_exit(zhp->zfs_hdl, tid, &oldmask))
1283			return (-1);
1284	}
1285
1286	(void) strlcpy(sdd->prevsnap, thissnap, sizeof (sdd->prevsnap));
1287	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1288	zfs_close(zhp);
1289	return (err);
1290}
1291
1292/*
1293 * Send all snapshots for a filesystem, updating the send dump data.
1294 */
1295static int
1296dump_filesystem(zfs_handle_t *zhp, send_dump_data_t *sdd)
1297{
1298	int rv = 0;
1299	boolean_t missingfrom = B_FALSE;
1300	zfs_cmd_t zc = {"\0"};
1301	uint64_t min_txg = 0, max_txg = 0;
1302
1303	/*
1304	 * Make sure the tosnap exists.
1305	 */
1306	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1307	    zhp->zfs_name, sdd->tosnap);
1308	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1309		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1310		    "WARNING: could not send %s@%s: does not exist\n"),
1311		    zhp->zfs_name, sdd->tosnap);
1312		sdd->err = B_TRUE;
1313		return (0);
1314	}
1315
1316	/*
1317	 * If this fs does not have fromsnap, and we're doing
1318	 * recursive, we need to send a full stream from the
1319	 * beginning (or an incremental from the origin if this
1320	 * is a clone).  If we're doing non-recursive, then let
1321	 * them get the error.
1322	 */
1323	if (sdd->replicate && sdd->fromsnap) {
1324		/*
1325		 * Make sure the fromsnap exists.
1326		 */
1327		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1328		    zhp->zfs_name, sdd->fromsnap);
1329		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0)
1330			missingfrom = B_TRUE;
1331	}
1332
1333	sdd->seenfrom = sdd->seento = B_FALSE;
1334	sdd->prevsnap[0] = '\0';
1335	sdd->prevsnap_obj = 0;
1336	if (sdd->fromsnap == NULL || missingfrom)
1337		sdd->seenfrom = B_TRUE;
1338
1339	/*
1340	 * Iterate through all snapshots and process the ones we will be
1341	 * sending. If we only have a "from" and "to" snapshot to deal
1342	 * with, we can avoid iterating through all the other snapshots.
1343	 */
1344	if (sdd->doall || sdd->replicate || sdd->tosnap == NULL) {
1345		if (!sdd->replicate) {
1346			if (sdd->fromsnap != NULL) {
1347				min_txg = get_snap_txg(zhp->zfs_hdl,
1348				    zhp->zfs_name, sdd->fromsnap);
1349			}
1350			if (sdd->tosnap != NULL) {
1351				max_txg = get_snap_txg(zhp->zfs_hdl,
1352				    zhp->zfs_name, sdd->tosnap);
1353			}
1354		}
1355		rv = zfs_iter_snapshots_sorted_v2(zhp, 0, dump_snapshot, sdd,
1356		    min_txg, max_txg);
1357	} else {
1358		char snapname[MAXPATHLEN] = { 0 };
1359		zfs_handle_t *snap;
1360
1361		/* Dump fromsnap. */
1362		if (!sdd->seenfrom) {
1363			(void) snprintf(snapname, sizeof (snapname),
1364			    "%s@%s", zhp->zfs_name, sdd->fromsnap);
1365			snap = zfs_open(zhp->zfs_hdl, snapname,
1366			    ZFS_TYPE_SNAPSHOT);
1367			if (snap != NULL)
1368				rv = dump_snapshot(snap, sdd);
1369			else
1370				rv = errno;
1371		}
1372
1373		/* Dump tosnap. */
1374		if (rv == 0) {
1375			(void) snprintf(snapname, sizeof (snapname),
1376			    "%s@%s", zhp->zfs_name, sdd->tosnap);
1377			snap = zfs_open(zhp->zfs_hdl, snapname,
1378			    ZFS_TYPE_SNAPSHOT);
1379			if (snap != NULL)
1380				rv = dump_snapshot(snap, sdd);
1381			else
1382				rv = errno;
1383		}
1384	}
1385
1386	if (!sdd->seenfrom) {
1387		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1388		    "WARNING: could not send %s@%s:\n"
1389		    "incremental source (%s@%s) does not exist\n"),
1390		    zhp->zfs_name, sdd->tosnap,
1391		    zhp->zfs_name, sdd->fromsnap);
1392		sdd->err = B_TRUE;
1393	} else if (!sdd->seento) {
1394		if (sdd->fromsnap) {
1395			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1396			    "WARNING: could not send %s@%s:\n"
1397			    "incremental source (%s@%s) "
1398			    "is not earlier than it\n"),
1399			    zhp->zfs_name, sdd->tosnap,
1400			    zhp->zfs_name, sdd->fromsnap);
1401		} else {
1402			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1403			    "WARNING: "
1404			    "could not send %s@%s: does not exist\n"),
1405			    zhp->zfs_name, sdd->tosnap);
1406		}
1407		sdd->err = B_TRUE;
1408	}
1409
1410	return (rv);
1411}
1412
1413/*
1414 * Send all snapshots for all filesystems in sdd.
1415 */
1416static int
1417dump_filesystems(zfs_handle_t *rzhp, send_dump_data_t *sdd)
1418{
1419	nvpair_t *fspair;
1420	boolean_t needagain, progress;
1421
1422	if (!sdd->replicate)
1423		return (dump_filesystem(rzhp, sdd));
1424
1425	/* Mark the clone origin snapshots. */
1426	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1427	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1428		nvlist_t *nvfs;
1429		uint64_t origin_guid = 0;
1430
1431		nvfs = fnvpair_value_nvlist(fspair);
1432		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1433		if (origin_guid != 0) {
1434			const char *snapname;
1435			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1436			    origin_guid, &snapname);
1437			if (origin_nv != NULL) {
1438				nvlist_t *snapprops;
1439				snapprops = fnvlist_lookup_nvlist(origin_nv,
1440				    "snapprops");
1441				snapprops = fnvlist_lookup_nvlist(snapprops,
1442				    snapname);
1443				fnvlist_add_boolean(snapprops,
1444				    "is_clone_origin");
1445			}
1446		}
1447	}
1448again:
1449	needagain = progress = B_FALSE;
1450	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1451	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1452		nvlist_t *fslist, *parent_nv;
1453		const char *fsname;
1454		zfs_handle_t *zhp;
1455		int err;
1456		uint64_t origin_guid = 0;
1457		uint64_t parent_guid = 0;
1458
1459		fslist = fnvpair_value_nvlist(fspair);
1460		if (nvlist_lookup_boolean(fslist, "sent") == 0)
1461			continue;
1462
1463		fsname = fnvlist_lookup_string(fslist, "name");
1464		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1465		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1466		    &parent_guid);
1467
1468		if (parent_guid != 0) {
1469			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1470			if (!nvlist_exists(parent_nv, "sent")) {
1471				/* Parent has not been sent; skip this one. */
1472				needagain = B_TRUE;
1473				continue;
1474			}
1475		}
1476
1477		if (origin_guid != 0) {
1478			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1479			    origin_guid, NULL);
1480			if (origin_nv != NULL &&
1481			    !nvlist_exists(origin_nv, "sent")) {
1482				/*
1483				 * Origin has not been sent yet;
1484				 * skip this clone.
1485				 */
1486				needagain = B_TRUE;
1487				continue;
1488			}
1489		}
1490
1491		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1492		if (zhp == NULL)
1493			return (-1);
1494		err = dump_filesystem(zhp, sdd);
1495		fnvlist_add_boolean(fslist, "sent");
1496		progress = B_TRUE;
1497		zfs_close(zhp);
1498		if (err)
1499			return (err);
1500	}
1501	if (needagain) {
1502		assert(progress);
1503		goto again;
1504	}
1505
1506	/* Clean out the sent flags in case we reuse this fss. */
1507	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1508	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1509		nvlist_t *fslist;
1510
1511		fslist = fnvpair_value_nvlist(fspair);
1512		(void) nvlist_remove_all(fslist, "sent");
1513	}
1514
1515	return (0);
1516}
1517
1518nvlist_t *
1519zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1520{
1521	unsigned int version;
1522	int nread, i;
1523	unsigned long long checksum, packed_len;
1524
1525	/*
1526	 * Decode token header, which is:
1527	 *   <token version>-<checksum of payload>-<uncompressed payload length>
1528	 * Note that the only supported token version is 1.
1529	 */
1530	nread = sscanf(token, "%u-%llx-%llx-",
1531	    &version, &checksum, &packed_len);
1532	if (nread != 3) {
1533		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1534		    "resume token is corrupt (invalid format)"));
1535		return (NULL);
1536	}
1537
1538	if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1539		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1540		    "resume token is corrupt (invalid version %u)"),
1541		    version);
1542		return (NULL);
1543	}
1544
1545	/* Convert hexadecimal representation to binary. */
1546	token = strrchr(token, '-') + 1;
1547	int len = strlen(token) / 2;
1548	unsigned char *compressed = zfs_alloc(hdl, len);
1549	for (i = 0; i < len; i++) {
1550		nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1551		if (nread != 1) {
1552			free(compressed);
1553			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1554			    "resume token is corrupt "
1555			    "(payload is not hex-encoded)"));
1556			return (NULL);
1557		}
1558	}
1559
1560	/* Verify checksum. */
1561	zio_cksum_t cksum;
1562	fletcher_4_native_varsize(compressed, len, &cksum);
1563	if (cksum.zc_word[0] != checksum) {
1564		free(compressed);
1565		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1566		    "resume token is corrupt (incorrect checksum)"));
1567		return (NULL);
1568	}
1569
1570	/* Uncompress. */
1571	void *packed = zfs_alloc(hdl, packed_len);
1572	uLongf packed_len_long = packed_len;
1573	if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1574	    packed_len_long != packed_len) {
1575		free(packed);
1576		free(compressed);
1577		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1578		    "resume token is corrupt (decompression failed)"));
1579		return (NULL);
1580	}
1581
1582	/* Unpack nvlist. */
1583	nvlist_t *nv;
1584	int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1585	free(packed);
1586	free(compressed);
1587	if (error != 0) {
1588		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1589		    "resume token is corrupt (nvlist_unpack failed)"));
1590		return (NULL);
1591	}
1592	return (nv);
1593}
1594
1595static enum lzc_send_flags
1596lzc_flags_from_sendflags(const sendflags_t *flags)
1597{
1598	enum lzc_send_flags lzc_flags = 0;
1599
1600	if (flags->largeblock)
1601		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1602	if (flags->embed_data)
1603		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1604	if (flags->compress)
1605		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1606	if (flags->raw)
1607		lzc_flags |= LZC_SEND_FLAG_RAW;
1608	if (flags->saved)
1609		lzc_flags |= LZC_SEND_FLAG_SAVED;
1610
1611	return (lzc_flags);
1612}
1613
1614static int
1615estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
1616    uint64_t resumeobj, uint64_t resumeoff, uint64_t bytes,
1617    const char *redactbook, char *errbuf, uint64_t *sizep)
1618{
1619	uint64_t size;
1620	FILE *fout = flags->dryrun ? stdout : stderr;
1621	progress_arg_t pa = { 0 };
1622	int err = 0;
1623	pthread_t ptid;
1624	sigset_t oldmask;
1625
1626	{
1627		pa.pa_zhp = zhp;
1628		pa.pa_fd = fd;
1629		pa.pa_parsable = flags->parsable;
1630		pa.pa_estimate = B_TRUE;
1631		pa.pa_verbosity = flags->verbosity;
1632
1633		err = pthread_create(&ptid, NULL,
1634		    send_progress_thread, &pa);
1635		if (err != 0) {
1636			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(errno));
1637			return (zfs_error(zhp->zfs_hdl,
1638			    EZFS_THREADCREATEFAILED, errbuf));
1639		}
1640		SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
1641	}
1642
1643	err = lzc_send_space_resume_redacted(zhp->zfs_name, from,
1644	    lzc_flags_from_sendflags(flags), resumeobj, resumeoff, bytes,
1645	    redactbook, fd, &size);
1646	*sizep = size;
1647
1648	if (send_progress_thread_exit(zhp->zfs_hdl, ptid, &oldmask))
1649		return (-1);
1650
1651	if (!flags->progress && !flags->parsable)
1652		return (err);
1653
1654	if (err != 0) {
1655		zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(err));
1656		return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
1657		    errbuf));
1658	}
1659	send_print_verbose(fout, zhp->zfs_name, from, size,
1660	    flags->parsable);
1661
1662	if (flags->parsable) {
1663		(void) fprintf(fout, "size\t%llu\n", (longlong_t)size);
1664	} else {
1665		char buf[16];
1666		zfs_nicenum(size, buf, sizeof (buf));
1667		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1668		    "total estimated size is %s\n"), buf);
1669	}
1670	return (0);
1671}
1672
1673static boolean_t
1674redact_snaps_contains(const uint64_t *snaps, uint64_t num_snaps, uint64_t guid)
1675{
1676	for (int i = 0; i < num_snaps; i++) {
1677		if (snaps[i] == guid)
1678			return (B_TRUE);
1679	}
1680	return (B_FALSE);
1681}
1682
1683static boolean_t
1684redact_snaps_equal(const uint64_t *snaps1, uint64_t num_snaps1,
1685    const uint64_t *snaps2, uint64_t num_snaps2)
1686{
1687	if (num_snaps1 != num_snaps2)
1688		return (B_FALSE);
1689	for (int i = 0; i < num_snaps1; i++) {
1690		if (!redact_snaps_contains(snaps2, num_snaps2, snaps1[i]))
1691			return (B_FALSE);
1692	}
1693	return (B_TRUE);
1694}
1695
1696static int
1697get_bookmarks(const char *path, nvlist_t **bmarksp)
1698{
1699	nvlist_t *props = fnvlist_alloc();
1700	int error;
1701
1702	fnvlist_add_boolean(props, "redact_complete");
1703	fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1704	error = lzc_get_bookmarks(path, props, bmarksp);
1705	fnvlist_free(props);
1706	return (error);
1707}
1708
1709static nvpair_t *
1710find_redact_pair(nvlist_t *bmarks, const uint64_t *redact_snap_guids,
1711    int num_redact_snaps)
1712{
1713	nvpair_t *pair;
1714
1715	for (pair = nvlist_next_nvpair(bmarks, NULL); pair;
1716	    pair = nvlist_next_nvpair(bmarks, pair)) {
1717
1718		nvlist_t *bmark = fnvpair_value_nvlist(pair);
1719		nvlist_t *vallist = fnvlist_lookup_nvlist(bmark,
1720		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1721		uint_t len = 0;
1722		uint64_t *bmarksnaps = fnvlist_lookup_uint64_array(vallist,
1723		    ZPROP_VALUE, &len);
1724		if (redact_snaps_equal(redact_snap_guids,
1725		    num_redact_snaps, bmarksnaps, len)) {
1726			break;
1727		}
1728	}
1729	return (pair);
1730}
1731
1732static boolean_t
1733get_redact_complete(nvpair_t *pair)
1734{
1735	nvlist_t *bmark = fnvpair_value_nvlist(pair);
1736	nvlist_t *vallist = fnvlist_lookup_nvlist(bmark, "redact_complete");
1737	boolean_t complete = fnvlist_lookup_boolean_value(vallist,
1738	    ZPROP_VALUE);
1739
1740	return (complete);
1741}
1742
1743/*
1744 * Check that the list of redaction snapshots in the bookmark matches the send
1745 * we're resuming, and return whether or not it's complete.
1746 *
1747 * Note that the caller needs to free the contents of *bookname with free() if
1748 * this function returns successfully.
1749 */
1750static int
1751find_redact_book(libzfs_handle_t *hdl, const char *path,
1752    const uint64_t *redact_snap_guids, int num_redact_snaps,
1753    char **bookname)
1754{
1755	char errbuf[ERRBUFLEN];
1756	nvlist_t *bmarks;
1757
1758	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1759	    "cannot resume send"));
1760
1761	int error = get_bookmarks(path, &bmarks);
1762	if (error != 0) {
1763		if (error == ESRCH) {
1764			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1765			    "nonexistent redaction bookmark provided"));
1766		} else if (error == ENOENT) {
1767			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1768			    "dataset to be sent no longer exists"));
1769		} else {
1770			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1771			    "unknown error: %s"), zfs_strerror(error));
1772		}
1773		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1774	}
1775	nvpair_t *pair = find_redact_pair(bmarks, redact_snap_guids,
1776	    num_redact_snaps);
1777	if (pair == NULL)  {
1778		fnvlist_free(bmarks);
1779		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1780		    "no appropriate redaction bookmark exists"));
1781		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1782	}
1783	boolean_t complete = get_redact_complete(pair);
1784	if (!complete) {
1785		fnvlist_free(bmarks);
1786		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1787		    "incomplete redaction bookmark provided"));
1788		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1789	}
1790	*bookname = strndup(nvpair_name(pair), ZFS_MAX_DATASET_NAME_LEN);
1791	ASSERT3P(*bookname, !=, NULL);
1792	fnvlist_free(bmarks);
1793	return (0);
1794}
1795
1796static enum lzc_send_flags
1797lzc_flags_from_resume_nvl(nvlist_t *resume_nvl)
1798{
1799	enum lzc_send_flags lzc_flags = 0;
1800
1801	if (nvlist_exists(resume_nvl, "largeblockok"))
1802		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1803	if (nvlist_exists(resume_nvl, "embedok"))
1804		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1805	if (nvlist_exists(resume_nvl, "compressok"))
1806		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1807	if (nvlist_exists(resume_nvl, "rawok"))
1808		lzc_flags |= LZC_SEND_FLAG_RAW;
1809	if (nvlist_exists(resume_nvl, "savedok"))
1810		lzc_flags |= LZC_SEND_FLAG_SAVED;
1811
1812	return (lzc_flags);
1813}
1814
1815static int
1816zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
1817    int outfd, nvlist_t *resume_nvl)
1818{
1819	char errbuf[ERRBUFLEN];
1820	const char *toname;
1821	const char *fromname = NULL;
1822	uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1823	zfs_handle_t *zhp;
1824	int error = 0;
1825	char name[ZFS_MAX_DATASET_NAME_LEN];
1826	FILE *fout = (flags->verbosity > 0 && flags->dryrun) ? stdout : stderr;
1827	uint64_t *redact_snap_guids = NULL;
1828	int num_redact_snaps = 0;
1829	char *redact_book = NULL;
1830	uint64_t size = 0;
1831
1832	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1833	    "cannot resume send"));
1834
1835	if (flags->verbosity != 0) {
1836		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1837		    "resume token contents:\n"));
1838		nvlist_print(fout, resume_nvl);
1839	}
1840
1841	if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1842	    nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1843	    nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1844	    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1845	    nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1846		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1847		    "resume token is corrupt"));
1848		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1849	}
1850	fromguid = 0;
1851	(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1852
1853	if (flags->saved) {
1854		(void) strlcpy(name, toname, sizeof (name));
1855	} else {
1856		error = guid_to_name(hdl, toname, toguid, B_FALSE, name);
1857		if (error != 0) {
1858			if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1859				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1860				    "'%s' is no longer the same snapshot "
1861				    "used in the initial send"), toname);
1862			} else {
1863				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1864				    "'%s' used in the initial send no "
1865				    "longer exists"), toname);
1866			}
1867			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1868		}
1869	}
1870
1871	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1872	if (zhp == NULL) {
1873		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1874		    "unable to access '%s'"), name);
1875		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1876	}
1877
1878	if (nvlist_lookup_uint64_array(resume_nvl, "book_redact_snaps",
1879	    &redact_snap_guids, (uint_t *)&num_redact_snaps) != 0) {
1880		num_redact_snaps = -1;
1881	}
1882
1883	if (fromguid != 0) {
1884		if (guid_to_name_redact_snaps(hdl, toname, fromguid, B_TRUE,
1885		    redact_snap_guids, num_redact_snaps, name) != 0) {
1886			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1887			    "incremental source %#llx no longer exists"),
1888			    (longlong_t)fromguid);
1889			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1890		}
1891		fromname = name;
1892	}
1893
1894	redact_snap_guids = NULL;
1895
1896	if (nvlist_lookup_uint64_array(resume_nvl,
1897	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &redact_snap_guids,
1898	    (uint_t *)&num_redact_snaps) == 0) {
1899		char path[ZFS_MAX_DATASET_NAME_LEN];
1900
1901		(void) strlcpy(path, toname, sizeof (path));
1902		char *at = strchr(path, '@');
1903		ASSERT3P(at, !=, NULL);
1904
1905		*at = '\0';
1906
1907		if ((error = find_redact_book(hdl, path, redact_snap_guids,
1908		    num_redact_snaps, &redact_book)) != 0) {
1909			return (error);
1910		}
1911	}
1912
1913	enum lzc_send_flags lzc_flags = lzc_flags_from_sendflags(flags) |
1914	    lzc_flags_from_resume_nvl(resume_nvl);
1915
1916	if (flags->verbosity != 0 || flags->progressastitle) {
1917		/*
1918		 * Some of these may have come from the resume token, set them
1919		 * here for size estimate purposes.
1920		 */
1921		sendflags_t tmpflags = *flags;
1922		if (lzc_flags & LZC_SEND_FLAG_LARGE_BLOCK)
1923			tmpflags.largeblock = B_TRUE;
1924		if (lzc_flags & LZC_SEND_FLAG_COMPRESS)
1925			tmpflags.compress = B_TRUE;
1926		if (lzc_flags & LZC_SEND_FLAG_EMBED_DATA)
1927			tmpflags.embed_data = B_TRUE;
1928		if (lzc_flags & LZC_SEND_FLAG_RAW)
1929			tmpflags.raw = B_TRUE;
1930		if (lzc_flags & LZC_SEND_FLAG_SAVED)
1931			tmpflags.saved = B_TRUE;
1932		error = estimate_size(zhp, fromname, outfd, &tmpflags,
1933		    resumeobj, resumeoff, bytes, redact_book, errbuf, &size);
1934	}
1935
1936	if (!flags->dryrun) {
1937		progress_arg_t pa = { 0 };
1938		pthread_t tid;
1939		sigset_t oldmask;
1940		/*
1941		 * If progress reporting is requested, spawn a new thread to
1942		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1943		 */
1944		{
1945			pa.pa_zhp = zhp;
1946			pa.pa_fd = outfd;
1947			pa.pa_parsable = flags->parsable;
1948			pa.pa_estimate = B_FALSE;
1949			pa.pa_verbosity = flags->verbosity;
1950			pa.pa_size = size;
1951			pa.pa_astitle = flags->progressastitle;
1952			pa.pa_progress = flags->progress;
1953
1954			error = pthread_create(&tid, NULL,
1955			    send_progress_thread, &pa);
1956			if (error != 0) {
1957				if (redact_book != NULL)
1958					free(redact_book);
1959				zfs_close(zhp);
1960				return (error);
1961			}
1962			SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
1963		}
1964
1965		error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd,
1966		    lzc_flags, resumeobj, resumeoff, redact_book);
1967		if (redact_book != NULL)
1968			free(redact_book);
1969
1970		if (send_progress_thread_exit(hdl, tid, &oldmask)) {
1971			zfs_close(zhp);
1972			return (-1);
1973		}
1974
1975		char errbuf[ERRBUFLEN];
1976		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1977		    "warning: cannot send '%s'"), zhp->zfs_name);
1978
1979		zfs_close(zhp);
1980
1981		switch (error) {
1982		case 0:
1983			return (0);
1984		case EACCES:
1985			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1986			    "source key must be loaded"));
1987			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1988		case ESRCH:
1989			if (lzc_exists(zhp->zfs_name)) {
1990				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1991				    "incremental source could not be found"));
1992			}
1993			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1994
1995		case EXDEV:
1996		case ENOENT:
1997		case EDQUOT:
1998		case EFBIG:
1999		case EIO:
2000		case ENOLINK:
2001		case ENOSPC:
2002		case ENOSTR:
2003		case ENXIO:
2004		case EPIPE:
2005		case ERANGE:
2006		case EFAULT:
2007		case EROFS:
2008			zfs_error_aux(hdl, "%s", zfs_strerror(errno));
2009			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2010
2011		default:
2012			return (zfs_standard_error(hdl, errno, errbuf));
2013		}
2014	} else {
2015		if (redact_book != NULL)
2016			free(redact_book);
2017	}
2018
2019	zfs_close(zhp);
2020
2021	return (error);
2022}
2023
2024struct zfs_send_resume_impl {
2025	libzfs_handle_t *hdl;
2026	sendflags_t *flags;
2027	nvlist_t *resume_nvl;
2028};
2029
2030static int
2031zfs_send_resume_impl_cb(int outfd, void *arg)
2032{
2033	struct zfs_send_resume_impl *zsri = arg;
2034	return (zfs_send_resume_impl_cb_impl(zsri->hdl, zsri->flags, outfd,
2035	    zsri->resume_nvl));
2036}
2037
2038static int
2039zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
2040    nvlist_t *resume_nvl)
2041{
2042	struct zfs_send_resume_impl zsri = {
2043		.hdl = hdl,
2044		.flags = flags,
2045		.resume_nvl = resume_nvl,
2046	};
2047	return (lzc_send_wrapper(zfs_send_resume_impl_cb, outfd, &zsri));
2048}
2049
2050int
2051zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
2052    const char *resume_token)
2053{
2054	int ret;
2055	char errbuf[ERRBUFLEN];
2056	nvlist_t *resume_nvl;
2057
2058	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2059	    "cannot resume send"));
2060
2061	resume_nvl = zfs_send_resume_token_to_nvlist(hdl, resume_token);
2062	if (resume_nvl == NULL) {
2063		/*
2064		 * zfs_error_aux has already been set by
2065		 * zfs_send_resume_token_to_nvlist()
2066		 */
2067		return (zfs_error(hdl, EZFS_FAULT, errbuf));
2068	}
2069
2070	ret = zfs_send_resume_impl(hdl, flags, outfd, resume_nvl);
2071	fnvlist_free(resume_nvl);
2072
2073	return (ret);
2074}
2075
2076int
2077zfs_send_saved(zfs_handle_t *zhp, sendflags_t *flags, int outfd,
2078    const char *resume_token)
2079{
2080	int ret;
2081	libzfs_handle_t *hdl = zhp->zfs_hdl;
2082	nvlist_t *saved_nvl = NULL, *resume_nvl = NULL;
2083	uint64_t saved_guid = 0, resume_guid = 0;
2084	uint64_t obj = 0, off = 0, bytes = 0;
2085	char token_buf[ZFS_MAXPROPLEN];
2086	char errbuf[ERRBUFLEN];
2087
2088	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2089	    "saved send failed"));
2090
2091	ret = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
2092	    token_buf, sizeof (token_buf), NULL, NULL, 0, B_TRUE);
2093	if (ret != 0)
2094		goto out;
2095
2096	saved_nvl = zfs_send_resume_token_to_nvlist(hdl, token_buf);
2097	if (saved_nvl == NULL) {
2098		/*
2099		 * zfs_error_aux has already been set by
2100		 * zfs_send_resume_token_to_nvlist()
2101		 */
2102		ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2103		goto out;
2104	}
2105
2106	/*
2107	 * If a resume token is provided we use the object and offset
2108	 * from that instead of the default, which starts from the
2109	 * beginning.
2110	 */
2111	if (resume_token != NULL) {
2112		resume_nvl = zfs_send_resume_token_to_nvlist(hdl,
2113		    resume_token);
2114		if (resume_nvl == NULL) {
2115			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2116			goto out;
2117		}
2118
2119		if (nvlist_lookup_uint64(resume_nvl, "object", &obj) != 0 ||
2120		    nvlist_lookup_uint64(resume_nvl, "offset", &off) != 0 ||
2121		    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
2122		    nvlist_lookup_uint64(resume_nvl, "toguid",
2123		    &resume_guid) != 0) {
2124			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2125			    "provided resume token is corrupt"));
2126			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2127			goto out;
2128		}
2129
2130		if (nvlist_lookup_uint64(saved_nvl, "toguid",
2131		    &saved_guid)) {
2132			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2133			    "dataset's resume token is corrupt"));
2134			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2135			goto out;
2136		}
2137
2138		if (resume_guid != saved_guid) {
2139			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2140			    "provided resume token does not match dataset"));
2141			ret = zfs_error(hdl, EZFS_BADBACKUP, errbuf);
2142			goto out;
2143		}
2144	}
2145
2146	(void) nvlist_remove_all(saved_nvl, "object");
2147	fnvlist_add_uint64(saved_nvl, "object", obj);
2148
2149	(void) nvlist_remove_all(saved_nvl, "offset");
2150	fnvlist_add_uint64(saved_nvl, "offset", off);
2151
2152	(void) nvlist_remove_all(saved_nvl, "bytes");
2153	fnvlist_add_uint64(saved_nvl, "bytes", bytes);
2154
2155	(void) nvlist_remove_all(saved_nvl, "toname");
2156	fnvlist_add_string(saved_nvl, "toname", zhp->zfs_name);
2157
2158	ret = zfs_send_resume_impl(hdl, flags, outfd, saved_nvl);
2159
2160out:
2161	fnvlist_free(saved_nvl);
2162	fnvlist_free(resume_nvl);
2163	return (ret);
2164}
2165
2166/*
2167 * This function informs the target system that the recursive send is complete.
2168 * The record is also expected in the case of a send -p.
2169 */
2170static int
2171send_conclusion_record(int fd, zio_cksum_t *zc)
2172{
2173	dmu_replay_record_t drr = { 0 };
2174	drr.drr_type = DRR_END;
2175	if (zc != NULL)
2176		drr.drr_u.drr_end.drr_checksum = *zc;
2177	if (write(fd, &drr, sizeof (drr)) == -1) {
2178		return (errno);
2179	}
2180	return (0);
2181}
2182
2183/*
2184 * This function is responsible for sending the records that contain the
2185 * necessary information for the target system's libzfs to be able to set the
2186 * properties of the filesystem being received, or to be able to prepare for
2187 * a recursive receive.
2188 *
2189 * The "zhp" argument is the handle of the snapshot we are sending
2190 * (the "tosnap").  The "from" argument is the short snapshot name (the part
2191 * after the @) of the incremental source.
2192 */
2193static int
2194send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
2195    boolean_t gather_props, boolean_t recursive, boolean_t verbose,
2196    boolean_t dryrun, boolean_t raw, boolean_t replicate, boolean_t skipmissing,
2197    boolean_t backup, boolean_t holds, boolean_t props, boolean_t doall,
2198    nvlist_t **fssp, avl_tree_t **fsavlp)
2199{
2200	int err = 0;
2201	char *packbuf = NULL;
2202	size_t buflen = 0;
2203	zio_cksum_t zc = { {0} };
2204	int featureflags = 0;
2205	/* name of filesystem/volume that contains snapshot we are sending */
2206	char tofs[ZFS_MAX_DATASET_NAME_LEN];
2207	/* short name of snap we are sending */
2208	const char *tosnap = "";
2209
2210	char errbuf[ERRBUFLEN];
2211	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2212	    "warning: cannot send '%s'"), zhp->zfs_name);
2213	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp,
2214	    ZFS_PROP_VERSION) >= ZPL_VERSION_SA) {
2215		featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
2216	}
2217
2218	if (holds)
2219		featureflags |= DMU_BACKUP_FEATURE_HOLDS;
2220
2221	(void) strlcpy(tofs, zhp->zfs_name, ZFS_MAX_DATASET_NAME_LEN);
2222	char *at = strchr(tofs, '@');
2223	if (at != NULL) {
2224		*at = '\0';
2225		tosnap = at + 1;
2226	}
2227
2228	if (gather_props) {
2229		nvlist_t *hdrnv = fnvlist_alloc();
2230		nvlist_t *fss = NULL;
2231
2232		if (from != NULL)
2233			fnvlist_add_string(hdrnv, "fromsnap", from);
2234		fnvlist_add_string(hdrnv, "tosnap", tosnap);
2235		if (!recursive)
2236			fnvlist_add_boolean(hdrnv, "not_recursive");
2237
2238		if (raw) {
2239			fnvlist_add_boolean(hdrnv, "raw");
2240		}
2241
2242		if (gather_nvlist(zhp->zfs_hdl, tofs,
2243		    from, tosnap, recursive, raw, doall, replicate, skipmissing,
2244		    verbose, backup, holds, props, &fss, fsavlp) != 0) {
2245			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2246			    errbuf));
2247		}
2248		/*
2249		 * Do not allow the size of the properties list to exceed
2250		 * the limit
2251		 */
2252		if ((fnvlist_size(fss) + fnvlist_size(hdrnv)) >
2253		    zhp->zfs_hdl->libzfs_max_nvlist) {
2254			(void) snprintf(errbuf, sizeof (errbuf),
2255			    dgettext(TEXT_DOMAIN, "warning: cannot send '%s': "
2256			    "the size of the list of snapshots and properties "
2257			    "is too large to be received successfully.\n"
2258			    "Select a smaller number of snapshots to send.\n"),
2259			    zhp->zfs_name);
2260			return (zfs_error(zhp->zfs_hdl, EZFS_NOSPC,
2261			    errbuf));
2262		}
2263		fnvlist_add_nvlist(hdrnv, "fss", fss);
2264		VERIFY0(nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR,
2265		    0));
2266		if (fssp != NULL) {
2267			*fssp = fss;
2268		} else {
2269			fnvlist_free(fss);
2270		}
2271		fnvlist_free(hdrnv);
2272	}
2273
2274	if (!dryrun) {
2275		dmu_replay_record_t drr = { 0 };
2276		/* write first begin record */
2277		drr.drr_type = DRR_BEGIN;
2278		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
2279		DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
2280		    drr_versioninfo, DMU_COMPOUNDSTREAM);
2281		DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
2282		    drr_versioninfo, featureflags);
2283		if (snprintf(drr.drr_u.drr_begin.drr_toname,
2284		    sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", tofs,
2285		    tosnap) >= sizeof (drr.drr_u.drr_begin.drr_toname)) {
2286			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2287			    errbuf));
2288		}
2289		drr.drr_payloadlen = buflen;
2290
2291		err = dump_record(&drr, packbuf, buflen, &zc, fd);
2292		free(packbuf);
2293		if (err != 0) {
2294			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(err));
2295			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2296			    errbuf));
2297		}
2298		err = send_conclusion_record(fd, &zc);
2299		if (err != 0) {
2300			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(err));
2301			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2302			    errbuf));
2303		}
2304	}
2305	return (0);
2306}
2307
2308/*
2309 * Generate a send stream.  The "zhp" argument is the filesystem/volume
2310 * that contains the snapshot to send.  The "fromsnap" argument is the
2311 * short name (the part after the '@') of the snapshot that is the
2312 * incremental source to send from (if non-NULL).  The "tosnap" argument
2313 * is the short name of the snapshot to send.
2314 *
2315 * The content of the send stream is the snapshot identified by
2316 * 'tosnap'.  Incremental streams are requested in two ways:
2317 *     - from the snapshot identified by "fromsnap" (if non-null) or
2318 *     - from the origin of the dataset identified by zhp, which must
2319 *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
2320 *	 is TRUE.
2321 *
2322 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
2323 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
2324 * if "replicate" is set.  If "doall" is set, dump all the intermediate
2325 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
2326 * case too. If "props" is set, send properties.
2327 *
2328 * Pre-wrapped (cf. lzc_send_wrapper()).
2329 */
2330static int
2331zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2332    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
2333    void *cb_arg, nvlist_t **debugnvp)
2334{
2335	char errbuf[ERRBUFLEN];
2336	send_dump_data_t sdd = { 0 };
2337	int err = 0;
2338	nvlist_t *fss = NULL;
2339	avl_tree_t *fsavl = NULL;
2340	static uint64_t holdseq;
2341	int spa_version;
2342	FILE *fout;
2343
2344	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2345	    "cannot send '%s'"), zhp->zfs_name);
2346
2347	if (fromsnap && fromsnap[0] == '\0') {
2348		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2349		    "zero-length incremental source"));
2350		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
2351	}
2352
2353	if (fromsnap) {
2354		char full_fromsnap_name[ZFS_MAX_DATASET_NAME_LEN];
2355		if (snprintf(full_fromsnap_name, sizeof (full_fromsnap_name),
2356		    "%s@%s", zhp->zfs_name, fromsnap) >=
2357		    sizeof (full_fromsnap_name)) {
2358			err = EINVAL;
2359			goto stderr_out;
2360		}
2361		zfs_handle_t *fromsnapn = zfs_open(zhp->zfs_hdl,
2362		    full_fromsnap_name, ZFS_TYPE_SNAPSHOT);
2363		if (fromsnapn == NULL) {
2364			err = -1;
2365			goto err_out;
2366		}
2367		zfs_close(fromsnapn);
2368	}
2369
2370	if (flags->replicate || flags->doall || flags->props ||
2371	    flags->holds || flags->backup) {
2372		char full_tosnap_name[ZFS_MAX_DATASET_NAME_LEN];
2373		if (snprintf(full_tosnap_name, sizeof (full_tosnap_name),
2374		    "%s@%s", zhp->zfs_name, tosnap) >=
2375		    sizeof (full_tosnap_name)) {
2376			err = EINVAL;
2377			goto stderr_out;
2378		}
2379		zfs_handle_t *tosnap = zfs_open(zhp->zfs_hdl,
2380		    full_tosnap_name, ZFS_TYPE_SNAPSHOT);
2381		if (tosnap == NULL) {
2382			err = -1;
2383			goto err_out;
2384		}
2385		err = send_prelim_records(tosnap, fromsnap, outfd,
2386		    flags->replicate || flags->props || flags->holds,
2387		    flags->replicate, flags->verbosity > 0, flags->dryrun,
2388		    flags->raw, flags->replicate, flags->skipmissing,
2389		    flags->backup, flags->holds, flags->props, flags->doall,
2390		    &fss, &fsavl);
2391		zfs_close(tosnap);
2392		if (err != 0)
2393			goto err_out;
2394	}
2395
2396	/* dump each stream */
2397	sdd.fromsnap = fromsnap;
2398	sdd.tosnap = tosnap;
2399	sdd.outfd = outfd;
2400	sdd.replicate = flags->replicate;
2401	sdd.doall = flags->doall;
2402	sdd.fromorigin = flags->fromorigin;
2403	sdd.fss = fss;
2404	sdd.fsavl = fsavl;
2405	sdd.verbosity = flags->verbosity;
2406	sdd.parsable = flags->parsable;
2407	sdd.progress = flags->progress;
2408	sdd.progressastitle = flags->progressastitle;
2409	sdd.dryrun = flags->dryrun;
2410	sdd.large_block = flags->largeblock;
2411	sdd.embed_data = flags->embed_data;
2412	sdd.compress = flags->compress;
2413	sdd.raw = flags->raw;
2414	sdd.holds = flags->holds;
2415	sdd.filter_cb = filter_func;
2416	sdd.filter_cb_arg = cb_arg;
2417	if (debugnvp)
2418		sdd.debugnv = *debugnvp;
2419	if (sdd.verbosity != 0 && sdd.dryrun)
2420		sdd.std_out = B_TRUE;
2421	fout = sdd.std_out ? stdout : stderr;
2422
2423	/*
2424	 * Some flags require that we place user holds on the datasets that are
2425	 * being sent so they don't get destroyed during the send. We can skip
2426	 * this step if the pool is imported read-only since the datasets cannot
2427	 * be destroyed.
2428	 */
2429	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
2430	    ZPOOL_PROP_READONLY, NULL) &&
2431	    zfs_spa_version(zhp, &spa_version) == 0 &&
2432	    spa_version >= SPA_VERSION_USERREFS &&
2433	    (flags->doall || flags->replicate)) {
2434		++holdseq;
2435		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
2436		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
2437		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
2438		if (sdd.cleanup_fd < 0) {
2439			err = errno;
2440			goto stderr_out;
2441		}
2442		sdd.snapholds = fnvlist_alloc();
2443	} else {
2444		sdd.cleanup_fd = -1;
2445		sdd.snapholds = NULL;
2446	}
2447
2448	if (flags->verbosity != 0 || sdd.snapholds != NULL) {
2449		/*
2450		 * Do a verbose no-op dry run to get all the verbose output
2451		 * or to gather snapshot hold's before generating any data,
2452		 * then do a non-verbose real run to generate the streams.
2453		 */
2454		sdd.dryrun = B_TRUE;
2455		err = dump_filesystems(zhp, &sdd);
2456
2457		if (err != 0)
2458			goto stderr_out;
2459
2460		if (flags->verbosity != 0) {
2461			if (flags->parsable) {
2462				(void) fprintf(fout, "size\t%llu\n",
2463				    (longlong_t)sdd.size);
2464			} else {
2465				char buf[16];
2466				zfs_nicebytes(sdd.size, buf, sizeof (buf));
2467				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
2468				    "total estimated size is %s\n"), buf);
2469			}
2470		}
2471
2472		/* Ensure no snaps found is treated as an error. */
2473		if (!sdd.seento) {
2474			err = ENOENT;
2475			goto err_out;
2476		}
2477
2478		/* Skip the second run if dryrun was requested. */
2479		if (flags->dryrun)
2480			goto err_out;
2481
2482		if (sdd.snapholds != NULL) {
2483			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
2484			if (err != 0)
2485				goto stderr_out;
2486
2487			fnvlist_free(sdd.snapholds);
2488			sdd.snapholds = NULL;
2489		}
2490
2491		sdd.dryrun = B_FALSE;
2492		sdd.verbosity = 0;
2493	}
2494
2495	err = dump_filesystems(zhp, &sdd);
2496	fsavl_destroy(fsavl);
2497	fnvlist_free(fss);
2498
2499	/* Ensure no snaps found is treated as an error. */
2500	if (err == 0 && !sdd.seento)
2501		err = ENOENT;
2502
2503	if (sdd.cleanup_fd != -1) {
2504		VERIFY(0 == close(sdd.cleanup_fd));
2505		sdd.cleanup_fd = -1;
2506	}
2507
2508	if (!flags->dryrun && (flags->replicate || flags->doall ||
2509	    flags->props || flags->backup || flags->holds)) {
2510		/*
2511		 * write final end record.  NB: want to do this even if
2512		 * there was some error, because it might not be totally
2513		 * failed.
2514		 */
2515		int err2 = send_conclusion_record(outfd, NULL);
2516		if (err2 != 0)
2517			return (zfs_standard_error(zhp->zfs_hdl, err2, errbuf));
2518	}
2519
2520	return (err || sdd.err);
2521
2522stderr_out:
2523	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
2524err_out:
2525	fsavl_destroy(fsavl);
2526	fnvlist_free(fss);
2527	fnvlist_free(sdd.snapholds);
2528
2529	if (sdd.cleanup_fd != -1)
2530		VERIFY(0 == close(sdd.cleanup_fd));
2531	return (err);
2532}
2533
2534struct zfs_send {
2535	zfs_handle_t *zhp;
2536	const char *fromsnap;
2537	const char *tosnap;
2538	sendflags_t *flags;
2539	snapfilter_cb_t *filter_func;
2540	void *cb_arg;
2541	nvlist_t **debugnvp;
2542};
2543
2544static int
2545zfs_send_cb(int outfd, void *arg)
2546{
2547	struct zfs_send *zs = arg;
2548	return (zfs_send_cb_impl(zs->zhp, zs->fromsnap, zs->tosnap, zs->flags,
2549	    outfd, zs->filter_func, zs->cb_arg, zs->debugnvp));
2550}
2551
2552int
2553zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2554    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
2555    void *cb_arg, nvlist_t **debugnvp)
2556{
2557	struct zfs_send arg = {
2558		.zhp = zhp,
2559		.fromsnap = fromsnap,
2560		.tosnap = tosnap,
2561		.flags = flags,
2562		.filter_func = filter_func,
2563		.cb_arg = cb_arg,
2564		.debugnvp = debugnvp,
2565	};
2566	return (lzc_send_wrapper(zfs_send_cb, outfd, &arg));
2567}
2568
2569
2570static zfs_handle_t *
2571name_to_dir_handle(libzfs_handle_t *hdl, const char *snapname)
2572{
2573	char dirname[ZFS_MAX_DATASET_NAME_LEN];
2574	(void) strlcpy(dirname, snapname, ZFS_MAX_DATASET_NAME_LEN);
2575	char *c = strchr(dirname, '@');
2576	if (c != NULL)
2577		*c = '\0';
2578	return (zfs_open(hdl, dirname, ZFS_TYPE_DATASET));
2579}
2580
2581/*
2582 * Returns B_TRUE if earlier is an earlier snapshot in later's timeline; either
2583 * an earlier snapshot in the same filesystem, or a snapshot before later's
2584 * origin, or it's origin's origin, etc.
2585 */
2586static boolean_t
2587snapshot_is_before(zfs_handle_t *earlier, zfs_handle_t *later)
2588{
2589	boolean_t ret;
2590	uint64_t later_txg =
2591	    (later->zfs_type == ZFS_TYPE_FILESYSTEM ||
2592	    later->zfs_type == ZFS_TYPE_VOLUME ?
2593	    UINT64_MAX : zfs_prop_get_int(later, ZFS_PROP_CREATETXG));
2594	uint64_t earlier_txg = zfs_prop_get_int(earlier, ZFS_PROP_CREATETXG);
2595
2596	if (earlier_txg >= later_txg)
2597		return (B_FALSE);
2598
2599	zfs_handle_t *earlier_dir = name_to_dir_handle(earlier->zfs_hdl,
2600	    earlier->zfs_name);
2601	zfs_handle_t *later_dir = name_to_dir_handle(later->zfs_hdl,
2602	    later->zfs_name);
2603
2604	if (strcmp(earlier_dir->zfs_name, later_dir->zfs_name) == 0) {
2605		zfs_close(earlier_dir);
2606		zfs_close(later_dir);
2607		return (B_TRUE);
2608	}
2609
2610	char clonename[ZFS_MAX_DATASET_NAME_LEN];
2611	if (zfs_prop_get(later_dir, ZFS_PROP_ORIGIN, clonename,
2612	    ZFS_MAX_DATASET_NAME_LEN, NULL, NULL, 0, B_TRUE) != 0) {
2613		zfs_close(earlier_dir);
2614		zfs_close(later_dir);
2615		return (B_FALSE);
2616	}
2617
2618	zfs_handle_t *origin = zfs_open(earlier->zfs_hdl, clonename,
2619	    ZFS_TYPE_DATASET);
2620	uint64_t origin_txg = zfs_prop_get_int(origin, ZFS_PROP_CREATETXG);
2621
2622	/*
2623	 * If "earlier" is exactly the origin, then
2624	 * snapshot_is_before(earlier, origin) will return false (because
2625	 * they're the same).
2626	 */
2627	if (origin_txg == earlier_txg &&
2628	    strcmp(origin->zfs_name, earlier->zfs_name) == 0) {
2629		zfs_close(earlier_dir);
2630		zfs_close(later_dir);
2631		zfs_close(origin);
2632		return (B_TRUE);
2633	}
2634	zfs_close(earlier_dir);
2635	zfs_close(later_dir);
2636
2637	ret = snapshot_is_before(earlier, origin);
2638	zfs_close(origin);
2639	return (ret);
2640}
2641
2642/*
2643 * The "zhp" argument is the handle of the dataset to send (typically a
2644 * snapshot).  The "from" argument is the full name of the snapshot or
2645 * bookmark that is the incremental source.
2646 *
2647 * Pre-wrapped (cf. lzc_send_wrapper()).
2648 */
2649static int
2650zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
2651    sendflags_t *flags, const char *redactbook)
2652{
2653	int err;
2654	libzfs_handle_t *hdl = zhp->zfs_hdl;
2655	char *name = zhp->zfs_name;
2656	pthread_t ptid;
2657	progress_arg_t pa = { 0 };
2658	uint64_t size = 0;
2659
2660	char errbuf[ERRBUFLEN];
2661	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2662	    "warning: cannot send '%s'"), name);
2663
2664	if (from != NULL && strchr(from, '@')) {
2665		zfs_handle_t *from_zhp = zfs_open(hdl, from,
2666		    ZFS_TYPE_DATASET);
2667		if (from_zhp == NULL)
2668			return (-1);
2669		if (!snapshot_is_before(from_zhp, zhp)) {
2670			zfs_close(from_zhp);
2671			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2672			    "not an earlier snapshot from the same fs"));
2673			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2674		}
2675		zfs_close(from_zhp);
2676	}
2677
2678	if (redactbook != NULL) {
2679		char bookname[ZFS_MAX_DATASET_NAME_LEN];
2680		nvlist_t *redact_snaps;
2681		zfs_handle_t *book_zhp;
2682		char *at, *pound;
2683		int dsnamelen;
2684
2685		pound = strchr(redactbook, '#');
2686		if (pound != NULL)
2687			redactbook = pound + 1;
2688		at = strchr(name, '@');
2689		if (at == NULL) {
2690			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2691			    "cannot do a redacted send to a filesystem"));
2692			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2693		}
2694		dsnamelen = at - name;
2695		if (snprintf(bookname, sizeof (bookname), "%.*s#%s",
2696		    dsnamelen, name, redactbook)
2697		    >= sizeof (bookname)) {
2698			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2699			    "invalid bookmark name"));
2700			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2701		}
2702		book_zhp = zfs_open(hdl, bookname, ZFS_TYPE_BOOKMARK);
2703		if (book_zhp == NULL)
2704			return (-1);
2705		if (nvlist_lookup_nvlist(book_zhp->zfs_props,
2706		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS),
2707		    &redact_snaps) != 0 || redact_snaps == NULL) {
2708			zfs_close(book_zhp);
2709			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2710			    "not a redaction bookmark"));
2711			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2712		}
2713		zfs_close(book_zhp);
2714	}
2715
2716	/*
2717	 * Send fs properties
2718	 */
2719	if (flags->props || flags->holds || flags->backup) {
2720		/*
2721		 * Note: the header generated by send_prelim_records()
2722		 * assumes that the incremental source is in the same
2723		 * filesystem/volume as the target (which is a requirement
2724		 * when doing "zfs send -R").  But that isn't always the
2725		 * case here (e.g. send from snap in origin, or send from
2726		 * bookmark).  We pass from=NULL, which will omit this
2727		 * information from the prelim records; it isn't used
2728		 * when receiving this type of stream.
2729		 */
2730		err = send_prelim_records(zhp, NULL, fd, B_TRUE, B_FALSE,
2731		    flags->verbosity > 0, flags->dryrun, flags->raw,
2732		    flags->replicate, B_FALSE, flags->backup, flags->holds,
2733		    flags->props, flags->doall, NULL, NULL);
2734		if (err != 0)
2735			return (err);
2736	}
2737
2738	/*
2739	 * Perform size estimate if verbose was specified.
2740	 */
2741	if (flags->verbosity != 0 || flags->progressastitle) {
2742		err = estimate_size(zhp, from, fd, flags, 0, 0, 0, redactbook,
2743		    errbuf, &size);
2744		if (err != 0)
2745			return (err);
2746	}
2747
2748	if (flags->dryrun)
2749		return (0);
2750
2751	/*
2752	 * If progress reporting is requested, spawn a new thread to poll
2753	 * ZFS_IOC_SEND_PROGRESS at a regular interval.
2754	 */
2755	sigset_t oldmask;
2756	{
2757		pa.pa_zhp = zhp;
2758		pa.pa_fd = fd;
2759		pa.pa_parsable = flags->parsable;
2760		pa.pa_estimate = B_FALSE;
2761		pa.pa_verbosity = flags->verbosity;
2762		pa.pa_size = size;
2763		pa.pa_astitle = flags->progressastitle;
2764		pa.pa_progress = flags->progress;
2765
2766		err = pthread_create(&ptid, NULL,
2767		    send_progress_thread, &pa);
2768		if (err != 0) {
2769			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(errno));
2770			return (zfs_error(zhp->zfs_hdl,
2771			    EZFS_THREADCREATEFAILED, errbuf));
2772		}
2773		SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
2774	}
2775
2776	err = lzc_send_redacted(name, from, fd,
2777	    lzc_flags_from_sendflags(flags), redactbook);
2778
2779	if (send_progress_thread_exit(hdl, ptid, &oldmask))
2780			return (-1);
2781
2782	if (err == 0 && (flags->props || flags->holds || flags->backup)) {
2783		/* Write the final end record. */
2784		err = send_conclusion_record(fd, NULL);
2785		if (err != 0)
2786			return (zfs_standard_error(hdl, err, errbuf));
2787	}
2788	if (err != 0) {
2789		switch (errno) {
2790		case EXDEV:
2791			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2792			    "not an earlier snapshot from the same fs"));
2793			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2794
2795		case ENOENT:
2796		case ESRCH:
2797			if (lzc_exists(name)) {
2798				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2799				    "incremental source (%s) does not exist"),
2800				    from);
2801			}
2802			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2803
2804		case EACCES:
2805			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2806			    "dataset key must be loaded"));
2807			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
2808
2809		case EBUSY:
2810			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2811			    "target is busy; if a filesystem, "
2812			    "it must not be mounted"));
2813			return (zfs_error(hdl, EZFS_BUSY, errbuf));
2814
2815		case EDQUOT:
2816		case EFAULT:
2817		case EFBIG:
2818		case EINVAL:
2819		case EIO:
2820		case ENOLINK:
2821		case ENOSPC:
2822		case ENOSTR:
2823		case ENXIO:
2824		case EPIPE:
2825		case ERANGE:
2826		case EROFS:
2827			zfs_error_aux(hdl, "%s", zfs_strerror(errno));
2828			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2829
2830		default:
2831			return (zfs_standard_error(hdl, errno, errbuf));
2832		}
2833	}
2834	return (err != 0);
2835}
2836
2837struct zfs_send_one {
2838	zfs_handle_t *zhp;
2839	const char *from;
2840	sendflags_t *flags;
2841	const char *redactbook;
2842};
2843
2844static int
2845zfs_send_one_cb(int fd, void *arg)
2846{
2847	struct zfs_send_one *zso = arg;
2848	return (zfs_send_one_cb_impl(zso->zhp, zso->from, fd, zso->flags,
2849	    zso->redactbook));
2850}
2851
2852int
2853zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
2854    const char *redactbook)
2855{
2856	struct zfs_send_one zso = {
2857		.zhp = zhp,
2858		.from = from,
2859		.flags = flags,
2860		.redactbook = redactbook,
2861	};
2862	return (lzc_send_wrapper(zfs_send_one_cb, fd, &zso));
2863}
2864
2865/*
2866 * Routines specific to "zfs recv"
2867 */
2868
2869static int
2870recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2871    boolean_t byteswap, zio_cksum_t *zc)
2872{
2873	char *cp = buf;
2874	int rv;
2875	int len = ilen;
2876
2877	do {
2878		rv = read(fd, cp, len);
2879		cp += rv;
2880		len -= rv;
2881	} while (rv > 0);
2882
2883	if (rv < 0 || len != 0) {
2884		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2885		    "failed to read from stream"));
2886		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2887		    "cannot receive")));
2888	}
2889
2890	if (zc) {
2891		if (byteswap)
2892			fletcher_4_incremental_byteswap(buf, ilen, zc);
2893		else
2894			fletcher_4_incremental_native(buf, ilen, zc);
2895	}
2896	return (0);
2897}
2898
2899static int
2900recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2901    boolean_t byteswap, zio_cksum_t *zc)
2902{
2903	char *buf;
2904	int err;
2905
2906	buf = zfs_alloc(hdl, len);
2907
2908	if (len > hdl->libzfs_max_nvlist) {
2909		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "nvlist too large"));
2910		free(buf);
2911		return (ENOMEM);
2912	}
2913
2914	err = recv_read(hdl, fd, buf, len, byteswap, zc);
2915	if (err != 0) {
2916		free(buf);
2917		return (err);
2918	}
2919
2920	err = nvlist_unpack(buf, len, nvp, 0);
2921	free(buf);
2922	if (err != 0) {
2923		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2924		    "stream (malformed nvlist)"));
2925		return (EINVAL);
2926	}
2927	return (0);
2928}
2929
2930/*
2931 * Returns the grand origin (origin of origin of origin...) of a given handle.
2932 * If this dataset is not a clone, it simply returns a copy of the original
2933 * handle.
2934 */
2935static zfs_handle_t *
2936recv_open_grand_origin(zfs_handle_t *zhp)
2937{
2938	char origin[ZFS_MAX_DATASET_NAME_LEN];
2939	zprop_source_t src;
2940	zfs_handle_t *ozhp = zfs_handle_dup(zhp);
2941
2942	while (ozhp != NULL) {
2943		if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin,
2944		    sizeof (origin), &src, NULL, 0, B_FALSE) != 0)
2945			break;
2946
2947		(void) zfs_close(ozhp);
2948		ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM);
2949	}
2950
2951	return (ozhp);
2952}
2953
2954static int
2955recv_rename_impl(zfs_handle_t *zhp, const char *name, const char *newname)
2956{
2957	int err;
2958	zfs_handle_t *ozhp = NULL;
2959
2960	/*
2961	 * Attempt to rename the dataset. If it fails with EACCES we have
2962	 * attempted to rename the dataset outside of its encryption root.
2963	 * Force the dataset to become an encryption root and try again.
2964	 */
2965	err = lzc_rename(name, newname);
2966	if (err == EACCES) {
2967		ozhp = recv_open_grand_origin(zhp);
2968		if (ozhp == NULL) {
2969			err = ENOENT;
2970			goto out;
2971		}
2972
2973		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2974		    NULL, NULL, 0);
2975		if (err != 0)
2976			goto out;
2977
2978		err = lzc_rename(name, newname);
2979	}
2980
2981out:
2982	if (ozhp != NULL)
2983		zfs_close(ozhp);
2984	return (err);
2985}
2986
2987static int
2988recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2989    int baselen, char *newname, recvflags_t *flags)
2990{
2991	static int seq;
2992	int err;
2993	prop_changelist_t *clp = NULL;
2994	zfs_handle_t *zhp = NULL;
2995
2996	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2997	if (zhp == NULL) {
2998		err = -1;
2999		goto out;
3000	}
3001	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
3002	    flags->force ? MS_FORCE : 0);
3003	if (clp == NULL) {
3004		err = -1;
3005		goto out;
3006	}
3007	err = changelist_prefix(clp);
3008	if (err)
3009		goto out;
3010
3011	if (tryname) {
3012		(void) strlcpy(newname, tryname, ZFS_MAX_DATASET_NAME_LEN);
3013		if (flags->verbose) {
3014			(void) printf("attempting rename %s to %s\n",
3015			    name, newname);
3016		}
3017		err = recv_rename_impl(zhp, name, newname);
3018		if (err == 0)
3019			changelist_rename(clp, name, tryname);
3020	} else {
3021		err = ENOENT;
3022	}
3023
3024	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
3025		seq++;
3026
3027		(void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
3028		    "%.*srecv-%u-%u", baselen, name, getpid(), seq);
3029
3030		if (flags->verbose) {
3031			(void) printf("failed - trying rename %s to %s\n",
3032			    name, newname);
3033		}
3034		err = recv_rename_impl(zhp, name, newname);
3035		if (err == 0)
3036			changelist_rename(clp, name, newname);
3037		if (err && flags->verbose) {
3038			(void) printf("failed (%u) - "
3039			    "will try again on next pass\n", errno);
3040		}
3041		err = EAGAIN;
3042	} else if (flags->verbose) {
3043		if (err == 0)
3044			(void) printf("success\n");
3045		else
3046			(void) printf("failed (%u)\n", errno);
3047	}
3048
3049	(void) changelist_postfix(clp);
3050
3051out:
3052	if (clp != NULL)
3053		changelist_free(clp);
3054	if (zhp != NULL)
3055		zfs_close(zhp);
3056
3057	return (err);
3058}
3059
3060static int
3061recv_promote(libzfs_handle_t *hdl, const char *fsname,
3062    const char *origin_fsname, recvflags_t *flags)
3063{
3064	int err;
3065	zfs_cmd_t zc = {"\0"};
3066	zfs_handle_t *zhp = NULL, *ozhp = NULL;
3067
3068	if (flags->verbose)
3069		(void) printf("promoting %s\n", fsname);
3070
3071	(void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value));
3072	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
3073
3074	/*
3075	 * Attempt to promote the dataset. If it fails with EACCES the
3076	 * promotion would cause this dataset to leave its encryption root.
3077	 * Force the origin to become an encryption root and try again.
3078	 */
3079	err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
3080	if (err == EACCES) {
3081		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
3082		if (zhp == NULL) {
3083			err = -1;
3084			goto out;
3085		}
3086
3087		ozhp = recv_open_grand_origin(zhp);
3088		if (ozhp == NULL) {
3089			err = -1;
3090			goto out;
3091		}
3092
3093		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
3094		    NULL, NULL, 0);
3095		if (err != 0)
3096			goto out;
3097
3098		err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
3099	}
3100
3101out:
3102	if (zhp != NULL)
3103		zfs_close(zhp);
3104	if (ozhp != NULL)
3105		zfs_close(ozhp);
3106
3107	return (err);
3108}
3109
3110static int
3111recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
3112    char *newname, recvflags_t *flags)
3113{
3114	int err = 0;
3115	prop_changelist_t *clp;
3116	zfs_handle_t *zhp;
3117	boolean_t defer = B_FALSE;
3118	int spa_version;
3119
3120	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
3121	if (zhp == NULL)
3122		return (-1);
3123	zfs_type_t type = zfs_get_type(zhp);
3124	if (type == ZFS_TYPE_SNAPSHOT &&
3125	    zfs_spa_version(zhp, &spa_version) == 0 &&
3126	    spa_version >= SPA_VERSION_USERREFS)
3127		defer = B_TRUE;
3128	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
3129	    flags->force ? MS_FORCE : 0);
3130	zfs_close(zhp);
3131	if (clp == NULL)
3132		return (-1);
3133
3134	err = changelist_prefix(clp);
3135	if (err)
3136		return (err);
3137
3138	if (flags->verbose)
3139		(void) printf("attempting destroy %s\n", name);
3140	if (type == ZFS_TYPE_SNAPSHOT) {
3141		nvlist_t *nv = fnvlist_alloc();
3142		fnvlist_add_boolean(nv, name);
3143		err = lzc_destroy_snaps(nv, defer, NULL);
3144		fnvlist_free(nv);
3145	} else {
3146		err = lzc_destroy(name);
3147	}
3148	if (err == 0) {
3149		if (flags->verbose)
3150			(void) printf("success\n");
3151		changelist_remove(clp, name);
3152	}
3153
3154	(void) changelist_postfix(clp);
3155	changelist_free(clp);
3156
3157	/*
3158	 * Deferred destroy might destroy the snapshot or only mark it to be
3159	 * destroyed later, and it returns success in either case.
3160	 */
3161	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
3162	    ZFS_TYPE_SNAPSHOT))) {
3163		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
3164	}
3165
3166	return (err);
3167}
3168
3169typedef struct guid_to_name_data {
3170	uint64_t guid;
3171	boolean_t bookmark_ok;
3172	char *name;
3173	char *skip;
3174	uint64_t *redact_snap_guids;
3175	uint64_t num_redact_snaps;
3176} guid_to_name_data_t;
3177
3178static boolean_t
3179redact_snaps_match(zfs_handle_t *zhp, guid_to_name_data_t *gtnd)
3180{
3181	uint64_t *bmark_snaps;
3182	uint_t bmark_num_snaps;
3183	nvlist_t *nvl;
3184	if (zhp->zfs_type != ZFS_TYPE_BOOKMARK)
3185		return (B_FALSE);
3186
3187	nvl = fnvlist_lookup_nvlist(zhp->zfs_props,
3188	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
3189	bmark_snaps = fnvlist_lookup_uint64_array(nvl, ZPROP_VALUE,
3190	    &bmark_num_snaps);
3191	if (bmark_num_snaps != gtnd->num_redact_snaps)
3192		return (B_FALSE);
3193	int i = 0;
3194	for (; i < bmark_num_snaps; i++) {
3195		int j = 0;
3196		for (; j < bmark_num_snaps; j++) {
3197			if (bmark_snaps[i] == gtnd->redact_snap_guids[j])
3198				break;
3199		}
3200		if (j == bmark_num_snaps)
3201			break;
3202	}
3203	return (i == bmark_num_snaps);
3204}
3205
3206static int
3207guid_to_name_cb(zfs_handle_t *zhp, void *arg)
3208{
3209	guid_to_name_data_t *gtnd = arg;
3210	const char *slash;
3211	int err;
3212
3213	if (gtnd->skip != NULL &&
3214	    (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
3215	    strcmp(slash + 1, gtnd->skip) == 0) {
3216		zfs_close(zhp);
3217		return (0);
3218	}
3219
3220	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid &&
3221	    (gtnd->num_redact_snaps == -1 || redact_snaps_match(zhp, gtnd))) {
3222		(void) strcpy(gtnd->name, zhp->zfs_name);
3223		zfs_close(zhp);
3224		return (EEXIST);
3225	}
3226
3227	err = zfs_iter_children_v2(zhp, 0, guid_to_name_cb, gtnd);
3228	if (err != EEXIST && gtnd->bookmark_ok)
3229		err = zfs_iter_bookmarks_v2(zhp, 0, guid_to_name_cb, gtnd);
3230	zfs_close(zhp);
3231	return (err);
3232}
3233
3234/*
3235 * Attempt to find the local dataset associated with this guid.  In the case of
3236 * multiple matches, we attempt to find the "best" match by searching
3237 * progressively larger portions of the hierarchy.  This allows one to send a
3238 * tree of datasets individually and guarantee that we will find the source
3239 * guid within that hierarchy, even if there are multiple matches elsewhere.
3240 *
3241 * If num_redact_snaps is not -1, we attempt to find a redaction bookmark with
3242 * the specified number of redaction snapshots.  If num_redact_snaps isn't 0 or
3243 * -1, then redact_snap_guids will be an array of the guids of the snapshots the
3244 * redaction bookmark was created with.  If num_redact_snaps is -1, then we will
3245 * attempt to find a snapshot or bookmark (if bookmark_ok is passed) with the
3246 * given guid.  Note that a redaction bookmark can be returned if
3247 * num_redact_snaps == -1.
3248 */
3249static int
3250guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
3251    uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
3252    uint64_t num_redact_snaps, char *name)
3253{
3254	char pname[ZFS_MAX_DATASET_NAME_LEN];
3255	guid_to_name_data_t gtnd;
3256
3257	gtnd.guid = guid;
3258	gtnd.bookmark_ok = bookmark_ok;
3259	gtnd.name = name;
3260	gtnd.skip = NULL;
3261	gtnd.redact_snap_guids = redact_snap_guids;
3262	gtnd.num_redact_snaps = num_redact_snaps;
3263
3264	/*
3265	 * Search progressively larger portions of the hierarchy, starting
3266	 * with the filesystem specified by 'parent'.  This will
3267	 * select the "most local" version of the origin snapshot in the case
3268	 * that there are multiple matching snapshots in the system.
3269	 */
3270	(void) strlcpy(pname, parent, sizeof (pname));
3271	char *cp = strrchr(pname, '@');
3272	if (cp == NULL)
3273		cp = strchr(pname, '\0');
3274	for (; cp != NULL; cp = strrchr(pname, '/')) {
3275		/* Chop off the last component and open the parent */
3276		*cp = '\0';
3277		zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
3278
3279		if (zhp == NULL)
3280			continue;
3281		int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
3282		if (err != EEXIST)
3283			err = zfs_iter_children_v2(zhp, 0, guid_to_name_cb,
3284			    &gtnd);
3285		if (err != EEXIST && bookmark_ok)
3286			err = zfs_iter_bookmarks_v2(zhp, 0, guid_to_name_cb,
3287			    &gtnd);
3288		zfs_close(zhp);
3289		if (err == EEXIST)
3290			return (0);
3291
3292		/*
3293		 * Remember the last portion of the dataset so we skip it next
3294		 * time through (as we've already searched that portion of the
3295		 * hierarchy).
3296		 */
3297		gtnd.skip = strrchr(pname, '/') + 1;
3298	}
3299
3300	return (ENOENT);
3301}
3302
3303static int
3304guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
3305    boolean_t bookmark_ok, char *name)
3306{
3307	return (guid_to_name_redact_snaps(hdl, parent, guid, bookmark_ok, NULL,
3308	    -1, name));
3309}
3310
3311/*
3312 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
3313 * guid1 is after guid2.
3314 */
3315static int
3316created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
3317    uint64_t guid1, uint64_t guid2)
3318{
3319	nvlist_t *nvfs;
3320	const char *fsname = NULL, *snapname = NULL;
3321	char buf[ZFS_MAX_DATASET_NAME_LEN];
3322	int rv;
3323	zfs_handle_t *guid1hdl, *guid2hdl;
3324	uint64_t create1, create2;
3325
3326	if (guid2 == 0)
3327		return (0);
3328	if (guid1 == 0)
3329		return (1);
3330
3331	nvfs = fsavl_find(avl, guid1, &snapname);
3332	fsname = fnvlist_lookup_string(nvfs, "name");
3333	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3334	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3335	if (guid1hdl == NULL)
3336		return (-1);
3337
3338	nvfs = fsavl_find(avl, guid2, &snapname);
3339	fsname = fnvlist_lookup_string(nvfs, "name");
3340	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3341	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3342	if (guid2hdl == NULL) {
3343		zfs_close(guid1hdl);
3344		return (-1);
3345	}
3346
3347	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
3348	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
3349
3350	if (create1 < create2)
3351		rv = -1;
3352	else if (create1 > create2)
3353		rv = +1;
3354	else
3355		rv = 0;
3356
3357	zfs_close(guid1hdl);
3358	zfs_close(guid2hdl);
3359
3360	return (rv);
3361}
3362
3363/*
3364 * This function reestablishes the hierarchy of encryption roots after a
3365 * recursive incremental receive has completed. This must be done after the
3366 * second call to recv_incremental_replication() has renamed and promoted all
3367 * sent datasets to their final locations in the dataset hierarchy.
3368 */
3369static int
3370recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *top_zfs,
3371    nvlist_t *stream_nv)
3372{
3373	int err;
3374	nvpair_t *fselem = NULL;
3375	nvlist_t *stream_fss;
3376
3377	stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
3378
3379	while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
3380		zfs_handle_t *zhp = NULL;
3381		uint64_t crypt;
3382		nvlist_t *snaps, *props, *stream_nvfs = NULL;
3383		nvpair_t *snapel = NULL;
3384		boolean_t is_encroot, is_clone, stream_encroot;
3385		char *cp;
3386		const char *stream_keylocation = NULL;
3387		char keylocation[MAXNAMELEN];
3388		char fsname[ZFS_MAX_DATASET_NAME_LEN];
3389
3390		keylocation[0] = '\0';
3391		stream_nvfs = fnvpair_value_nvlist(fselem);
3392		snaps = fnvlist_lookup_nvlist(stream_nvfs, "snaps");
3393		props = fnvlist_lookup_nvlist(stream_nvfs, "props");
3394		stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
3395
3396		/* find a snapshot from the stream that exists locally */
3397		err = ENOENT;
3398		while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
3399			uint64_t guid;
3400
3401			guid = fnvpair_value_uint64(snapel);
3402			err = guid_to_name(hdl, top_zfs, guid, B_FALSE,
3403			    fsname);
3404			if (err == 0)
3405				break;
3406		}
3407
3408		if (err != 0)
3409			continue;
3410
3411		cp = strchr(fsname, '@');
3412		if (cp != NULL)
3413			*cp = '\0';
3414
3415		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
3416		if (zhp == NULL) {
3417			err = ENOENT;
3418			goto error;
3419		}
3420
3421		crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
3422		is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0';
3423		(void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
3424
3425		/* we don't need to do anything for unencrypted datasets */
3426		if (crypt == ZIO_CRYPT_OFF) {
3427			zfs_close(zhp);
3428			continue;
3429		}
3430
3431		/*
3432		 * If the dataset is flagged as an encryption root, was not
3433		 * received as a clone and is not currently an encryption root,
3434		 * force it to become one. Fixup the keylocation if necessary.
3435		 */
3436		if (stream_encroot) {
3437			if (!is_clone && !is_encroot) {
3438				err = lzc_change_key(fsname,
3439				    DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0);
3440				if (err != 0) {
3441					zfs_close(zhp);
3442					goto error;
3443				}
3444			}
3445
3446			stream_keylocation = fnvlist_lookup_string(props,
3447			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
3448
3449			/*
3450			 * Refresh the properties in case the call to
3451			 * lzc_change_key() changed the value.
3452			 */
3453			zfs_refresh_properties(zhp);
3454			err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
3455			    keylocation, sizeof (keylocation), NULL, NULL,
3456			    0, B_TRUE);
3457			if (err != 0) {
3458				zfs_close(zhp);
3459				goto error;
3460			}
3461
3462			if (strcmp(keylocation, stream_keylocation) != 0) {
3463				err = zfs_prop_set(zhp,
3464				    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
3465				    stream_keylocation);
3466				if (err != 0) {
3467					zfs_close(zhp);
3468					goto error;
3469				}
3470			}
3471		}
3472
3473		/*
3474		 * If the dataset is not flagged as an encryption root and is
3475		 * currently an encryption root, force it to inherit from its
3476		 * parent. The root of a raw send should never be
3477		 * force-inherited.
3478		 */
3479		if (!stream_encroot && is_encroot &&
3480		    strcmp(top_zfs, fsname) != 0) {
3481			err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT,
3482			    NULL, NULL, 0);
3483			if (err != 0) {
3484				zfs_close(zhp);
3485				goto error;
3486			}
3487		}
3488
3489		zfs_close(zhp);
3490	}
3491
3492	return (0);
3493
3494error:
3495	return (err);
3496}
3497
3498static int
3499recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
3500    recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3501    nvlist_t *renamed)
3502{
3503	nvlist_t *local_nv, *deleted = NULL;
3504	avl_tree_t *local_avl;
3505	nvpair_t *fselem, *nextfselem;
3506	const char *fromsnap;
3507	char newname[ZFS_MAX_DATASET_NAME_LEN];
3508	char guidname[32];
3509	int error;
3510	boolean_t needagain, progress, recursive;
3511	const char *s1, *s2;
3512
3513	fromsnap = fnvlist_lookup_string(stream_nv, "fromsnap");
3514
3515	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3516	    ENOENT);
3517
3518	if (flags->dryrun)
3519		return (0);
3520
3521again:
3522	needagain = progress = B_FALSE;
3523
3524	deleted = fnvlist_alloc();
3525
3526	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
3527	    recursive, B_TRUE, B_FALSE, recursive, B_FALSE, B_FALSE, B_FALSE,
3528	    B_FALSE, B_TRUE, &local_nv, &local_avl)) != 0)
3529		return (error);
3530
3531	/*
3532	 * Process deletes and renames
3533	 */
3534	for (fselem = nvlist_next_nvpair(local_nv, NULL);
3535	    fselem; fselem = nextfselem) {
3536		nvlist_t *nvfs, *snaps;
3537		nvlist_t *stream_nvfs = NULL;
3538		nvpair_t *snapelem, *nextsnapelem;
3539		uint64_t fromguid = 0;
3540		uint64_t originguid = 0;
3541		uint64_t stream_originguid = 0;
3542		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
3543		const char *fsname, *stream_fsname;
3544
3545		nextfselem = nvlist_next_nvpair(local_nv, fselem);
3546
3547		nvfs = fnvpair_value_nvlist(fselem);
3548		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
3549		fsname = fnvlist_lookup_string(nvfs, "name");
3550		parent_fromsnap_guid = fnvlist_lookup_uint64(nvfs,
3551		    "parentfromsnap");
3552		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
3553
3554		/*
3555		 * First find the stream's fs, so we can check for
3556		 * a different origin (due to "zfs promote")
3557		 */
3558		for (snapelem = nvlist_next_nvpair(snaps, NULL);
3559		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
3560			uint64_t thisguid;
3561
3562			thisguid = fnvpair_value_uint64(snapelem);
3563			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
3564
3565			if (stream_nvfs != NULL)
3566				break;
3567		}
3568
3569		/* check for promote */
3570		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
3571		    &stream_originguid);
3572		if (stream_nvfs && originguid != stream_originguid) {
3573			switch (created_before(hdl, local_avl,
3574			    stream_originguid, originguid)) {
3575			case 1: {
3576				/* promote it! */
3577				nvlist_t *origin_nvfs;
3578				const char *origin_fsname;
3579
3580				origin_nvfs = fsavl_find(local_avl, originguid,
3581				    NULL);
3582				origin_fsname = fnvlist_lookup_string(
3583				    origin_nvfs, "name");
3584				error = recv_promote(hdl, fsname, origin_fsname,
3585				    flags);
3586				if (error == 0)
3587					progress = B_TRUE;
3588				break;
3589			}
3590			default:
3591				break;
3592			case -1:
3593				fsavl_destroy(local_avl);
3594				fnvlist_free(local_nv);
3595				return (-1);
3596			}
3597			/*
3598			 * We had/have the wrong origin, therefore our
3599			 * list of snapshots is wrong.  Need to handle
3600			 * them on the next pass.
3601			 */
3602			needagain = B_TRUE;
3603			continue;
3604		}
3605
3606		for (snapelem = nvlist_next_nvpair(snaps, NULL);
3607		    snapelem; snapelem = nextsnapelem) {
3608			uint64_t thisguid;
3609			const char *stream_snapname;
3610			nvlist_t *found, *props;
3611
3612			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
3613
3614			thisguid = fnvpair_value_uint64(snapelem);
3615			found = fsavl_find(stream_avl, thisguid,
3616			    &stream_snapname);
3617
3618			/* check for delete */
3619			if (found == NULL) {
3620				char name[ZFS_MAX_DATASET_NAME_LEN];
3621
3622				if (!flags->force)
3623					continue;
3624
3625				(void) snprintf(name, sizeof (name), "%s@%s",
3626				    fsname, nvpair_name(snapelem));
3627
3628				error = recv_destroy(hdl, name,
3629				    strlen(fsname)+1, newname, flags);
3630				if (error)
3631					needagain = B_TRUE;
3632				else
3633					progress = B_TRUE;
3634				sprintf(guidname, "%llu",
3635				    (u_longlong_t)thisguid);
3636				nvlist_add_boolean(deleted, guidname);
3637				continue;
3638			}
3639
3640			stream_nvfs = found;
3641
3642			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
3643			    &props) && 0 == nvlist_lookup_nvlist(props,
3644			    stream_snapname, &props)) {
3645				zfs_cmd_t zc = {"\0"};
3646
3647				zc.zc_cookie = B_TRUE; /* received */
3648				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
3649				    "%s@%s", fsname, nvpair_name(snapelem));
3650				zcmd_write_src_nvlist(hdl, &zc, props);
3651				(void) zfs_ioctl(hdl,
3652				    ZFS_IOC_SET_PROP, &zc);
3653				zcmd_free_nvlists(&zc);
3654			}
3655
3656			/* check for different snapname */
3657			if (strcmp(nvpair_name(snapelem),
3658			    stream_snapname) != 0) {
3659				char name[ZFS_MAX_DATASET_NAME_LEN];
3660				char tryname[ZFS_MAX_DATASET_NAME_LEN];
3661
3662				(void) snprintf(name, sizeof (name), "%s@%s",
3663				    fsname, nvpair_name(snapelem));
3664				(void) snprintf(tryname, sizeof (name), "%s@%s",
3665				    fsname, stream_snapname);
3666
3667				error = recv_rename(hdl, name, tryname,
3668				    strlen(fsname)+1, newname, flags);
3669				if (error)
3670					needagain = B_TRUE;
3671				else
3672					progress = B_TRUE;
3673			}
3674
3675			if (strcmp(stream_snapname, fromsnap) == 0)
3676				fromguid = thisguid;
3677		}
3678
3679		/* check for delete */
3680		if (stream_nvfs == NULL) {
3681			if (!flags->force)
3682				continue;
3683
3684			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
3685			    newname, flags);
3686			if (error)
3687				needagain = B_TRUE;
3688			else
3689				progress = B_TRUE;
3690			sprintf(guidname, "%llu",
3691			    (u_longlong_t)parent_fromsnap_guid);
3692			nvlist_add_boolean(deleted, guidname);
3693			continue;
3694		}
3695
3696		if (fromguid == 0) {
3697			if (flags->verbose) {
3698				(void) printf("local fs %s does not have "
3699				    "fromsnap (%s in stream); must have "
3700				    "been deleted locally; ignoring\n",
3701				    fsname, fromsnap);
3702			}
3703			continue;
3704		}
3705
3706		stream_fsname = fnvlist_lookup_string(stream_nvfs, "name");
3707		stream_parent_fromsnap_guid = fnvlist_lookup_uint64(
3708		    stream_nvfs, "parentfromsnap");
3709
3710		s1 = strrchr(fsname, '/');
3711		s2 = strrchr(stream_fsname, '/');
3712
3713		/*
3714		 * Check if we're going to rename based on parent guid change
3715		 * and the current parent guid was also deleted. If it was then
3716		 * rename will fail and is likely unneeded, so avoid this and
3717		 * force an early retry to determine the new
3718		 * parent_fromsnap_guid.
3719		 */
3720		if (stream_parent_fromsnap_guid != 0 &&
3721		    parent_fromsnap_guid != 0 &&
3722		    stream_parent_fromsnap_guid != parent_fromsnap_guid) {
3723			sprintf(guidname, "%llu",
3724			    (u_longlong_t)parent_fromsnap_guid);
3725			if (nvlist_exists(deleted, guidname)) {
3726				progress = B_TRUE;
3727				needagain = B_TRUE;
3728				goto doagain;
3729			}
3730		}
3731
3732		/*
3733		 * Check for rename. If the exact receive path is specified, it
3734		 * does not count as a rename, but we still need to check the
3735		 * datasets beneath it.
3736		 */
3737		if ((stream_parent_fromsnap_guid != 0 &&
3738		    parent_fromsnap_guid != 0 &&
3739		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
3740		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
3741		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
3742			nvlist_t *parent;
3743			char tryname[ZFS_MAX_DATASET_NAME_LEN];
3744
3745			parent = fsavl_find(local_avl,
3746			    stream_parent_fromsnap_guid, NULL);
3747			/*
3748			 * NB: parent might not be found if we used the
3749			 * tosnap for stream_parent_fromsnap_guid,
3750			 * because the parent is a newly-created fs;
3751			 * we'll be able to rename it after we recv the
3752			 * new fs.
3753			 */
3754			if (parent != NULL) {
3755				const char *pname;
3756
3757				pname = fnvlist_lookup_string(parent, "name");
3758				(void) snprintf(tryname, sizeof (tryname),
3759				    "%s%s", pname, strrchr(stream_fsname, '/'));
3760			} else {
3761				tryname[0] = '\0';
3762				if (flags->verbose) {
3763					(void) printf("local fs %s new parent "
3764					    "not found\n", fsname);
3765				}
3766			}
3767
3768			newname[0] = '\0';
3769
3770			error = recv_rename(hdl, fsname, tryname,
3771			    strlen(tofs)+1, newname, flags);
3772
3773			if (renamed != NULL && newname[0] != '\0') {
3774				fnvlist_add_boolean(renamed, newname);
3775			}
3776
3777			if (error)
3778				needagain = B_TRUE;
3779			else
3780				progress = B_TRUE;
3781		}
3782	}
3783
3784doagain:
3785	fsavl_destroy(local_avl);
3786	fnvlist_free(local_nv);
3787	fnvlist_free(deleted);
3788
3789	if (needagain && progress) {
3790		/* do another pass to fix up temporary names */
3791		if (flags->verbose)
3792			(void) printf("another pass:\n");
3793		goto again;
3794	}
3795
3796	return (needagain || error != 0);
3797}
3798
3799static int
3800zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
3801    recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
3802    char **top_zfs, nvlist_t *cmdprops)
3803{
3804	nvlist_t *stream_nv = NULL;
3805	avl_tree_t *stream_avl = NULL;
3806	const char *fromsnap = NULL;
3807	const char *sendsnap = NULL;
3808	char *cp;
3809	char tofs[ZFS_MAX_DATASET_NAME_LEN];
3810	char sendfs[ZFS_MAX_DATASET_NAME_LEN];
3811	char errbuf[ERRBUFLEN];
3812	dmu_replay_record_t drre;
3813	int error;
3814	boolean_t anyerr = B_FALSE;
3815	boolean_t softerr = B_FALSE;
3816	boolean_t recursive, raw;
3817
3818	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3819	    "cannot receive"));
3820
3821	assert(drr->drr_type == DRR_BEGIN);
3822	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
3823	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
3824	    DMU_COMPOUNDSTREAM);
3825
3826	/*
3827	 * Read in the nvlist from the stream.
3828	 */
3829	if (drr->drr_payloadlen != 0) {
3830		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
3831		    &stream_nv, flags->byteswap, zc);
3832		if (error) {
3833			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3834			goto out;
3835		}
3836	}
3837
3838	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3839	    ENOENT);
3840	raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0);
3841
3842	if (recursive && strchr(destname, '@')) {
3843		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3844		    "cannot specify snapshot name for multi-snapshot stream"));
3845		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3846		goto out;
3847	}
3848
3849	/*
3850	 * Read in the end record and verify checksum.
3851	 */
3852	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
3853	    flags->byteswap, NULL)))
3854		goto out;
3855	if (flags->byteswap) {
3856		drre.drr_type = BSWAP_32(drre.drr_type);
3857		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
3858		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
3859		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
3860		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
3861		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
3862		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
3863		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
3864		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
3865	}
3866	if (drre.drr_type != DRR_END) {
3867		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3868		goto out;
3869	}
3870	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
3871		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3872		    "incorrect header checksum"));
3873		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3874		goto out;
3875	}
3876
3877	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
3878
3879	if (drr->drr_payloadlen != 0) {
3880		nvlist_t *stream_fss;
3881
3882		stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
3883		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
3884			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3885			    "couldn't allocate avl tree"));
3886			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
3887			goto out;
3888		}
3889
3890		if (fromsnap != NULL && recursive) {
3891			nvlist_t *renamed = NULL;
3892			nvpair_t *pair = NULL;
3893
3894			(void) strlcpy(tofs, destname, sizeof (tofs));
3895			if (flags->isprefix) {
3896				struct drr_begin *drrb = &drr->drr_u.drr_begin;
3897				int i;
3898
3899				if (flags->istail) {
3900					cp = strrchr(drrb->drr_toname, '/');
3901					if (cp == NULL) {
3902						(void) strlcat(tofs, "/",
3903						    sizeof (tofs));
3904						i = 0;
3905					} else {
3906						i = (cp - drrb->drr_toname);
3907					}
3908				} else {
3909					i = strcspn(drrb->drr_toname, "/@");
3910				}
3911				/* zfs_receive_one() will create_parents() */
3912				(void) strlcat(tofs, &drrb->drr_toname[i],
3913				    sizeof (tofs));
3914				*strchr(tofs, '@') = '\0';
3915			}
3916
3917			if (!flags->dryrun && !flags->nomount) {
3918				renamed = fnvlist_alloc();
3919			}
3920
3921			softerr = recv_incremental_replication(hdl, tofs, flags,
3922			    stream_nv, stream_avl, renamed);
3923
3924			/* Unmount renamed filesystems before receiving. */
3925			while ((pair = nvlist_next_nvpair(renamed,
3926			    pair)) != NULL) {
3927				zfs_handle_t *zhp;
3928				prop_changelist_t *clp = NULL;
3929
3930				zhp = zfs_open(hdl, nvpair_name(pair),
3931				    ZFS_TYPE_FILESYSTEM);
3932				if (zhp != NULL) {
3933					clp = changelist_gather(zhp,
3934					    ZFS_PROP_MOUNTPOINT, 0,
3935					    flags->forceunmount ? MS_FORCE : 0);
3936					zfs_close(zhp);
3937					if (clp != NULL) {
3938						softerr |=
3939						    changelist_prefix(clp);
3940						changelist_free(clp);
3941					}
3942				}
3943			}
3944
3945			fnvlist_free(renamed);
3946		}
3947	}
3948
3949	/*
3950	 * Get the fs specified by the first path in the stream (the top level
3951	 * specified by 'zfs send') and pass it to each invocation of
3952	 * zfs_receive_one().
3953	 */
3954	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
3955	    sizeof (sendfs));
3956	if ((cp = strchr(sendfs, '@')) != NULL) {
3957		*cp = '\0';
3958		/*
3959		 * Find the "sendsnap", the final snapshot in a replication
3960		 * stream.  zfs_receive_one() handles certain errors
3961		 * differently, depending on if the contained stream is the
3962		 * last one or not.
3963		 */
3964		sendsnap = (cp + 1);
3965	}
3966
3967	/* Finally, receive each contained stream */
3968	do {
3969		/*
3970		 * we should figure out if it has a recoverable
3971		 * error, in which case do a recv_skip() and drive on.
3972		 * Note, if we fail due to already having this guid,
3973		 * zfs_receive_one() will take care of it (ie,
3974		 * recv_skip() and return 0).
3975		 */
3976		error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
3977		    sendfs, stream_nv, stream_avl, top_zfs, sendsnap, cmdprops);
3978		if (error == ENODATA) {
3979			error = 0;
3980			break;
3981		}
3982		anyerr |= error;
3983	} while (error == 0);
3984
3985	if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) {
3986		/*
3987		 * Now that we have the fs's they sent us, try the
3988		 * renames again.
3989		 */
3990		softerr = recv_incremental_replication(hdl, tofs, flags,
3991		    stream_nv, stream_avl, NULL);
3992	}
3993
3994	if (raw && softerr == 0 && *top_zfs != NULL) {
3995		softerr = recv_fix_encryption_hierarchy(hdl, *top_zfs,
3996		    stream_nv);
3997	}
3998
3999out:
4000	fsavl_destroy(stream_avl);
4001	fnvlist_free(stream_nv);
4002	if (softerr)
4003		error = -2;
4004	if (anyerr)
4005		error = -1;
4006	return (error);
4007}
4008
4009static void
4010trunc_prop_errs(int truncated)
4011{
4012	ASSERT(truncated != 0);
4013
4014	if (truncated == 1)
4015		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4016		    "1 more property could not be set\n"));
4017	else
4018		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4019		    "%d more properties could not be set\n"), truncated);
4020}
4021
4022static int
4023recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
4024{
4025	dmu_replay_record_t *drr;
4026	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
4027	uint64_t payload_size;
4028	char errbuf[ERRBUFLEN];
4029
4030	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4031	    "cannot receive"));
4032
4033	/* XXX would be great to use lseek if possible... */
4034	drr = buf;
4035
4036	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
4037	    byteswap, NULL) == 0) {
4038		if (byteswap)
4039			drr->drr_type = BSWAP_32(drr->drr_type);
4040
4041		switch (drr->drr_type) {
4042		case DRR_BEGIN:
4043			if (drr->drr_payloadlen != 0) {
4044				(void) recv_read(hdl, fd, buf,
4045				    drr->drr_payloadlen, B_FALSE, NULL);
4046			}
4047			break;
4048
4049		case DRR_END:
4050			free(buf);
4051			return (0);
4052
4053		case DRR_OBJECT:
4054			if (byteswap) {
4055				drr->drr_u.drr_object.drr_bonuslen =
4056				    BSWAP_32(drr->drr_u.drr_object.
4057				    drr_bonuslen);
4058				drr->drr_u.drr_object.drr_raw_bonuslen =
4059				    BSWAP_32(drr->drr_u.drr_object.
4060				    drr_raw_bonuslen);
4061			}
4062
4063			payload_size =
4064			    DRR_OBJECT_PAYLOAD_SIZE(&drr->drr_u.drr_object);
4065			(void) recv_read(hdl, fd, buf, payload_size,
4066			    B_FALSE, NULL);
4067			break;
4068
4069		case DRR_WRITE:
4070			if (byteswap) {
4071				drr->drr_u.drr_write.drr_logical_size =
4072				    BSWAP_64(
4073				    drr->drr_u.drr_write.drr_logical_size);
4074				drr->drr_u.drr_write.drr_compressed_size =
4075				    BSWAP_64(
4076				    drr->drr_u.drr_write.drr_compressed_size);
4077			}
4078			payload_size =
4079			    DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
4080			assert(payload_size <= SPA_MAXBLOCKSIZE);
4081			(void) recv_read(hdl, fd, buf,
4082			    payload_size, B_FALSE, NULL);
4083			break;
4084		case DRR_SPILL:
4085			if (byteswap) {
4086				drr->drr_u.drr_spill.drr_length =
4087				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
4088				drr->drr_u.drr_spill.drr_compressed_size =
4089				    BSWAP_64(drr->drr_u.drr_spill.
4090				    drr_compressed_size);
4091			}
4092
4093			payload_size =
4094			    DRR_SPILL_PAYLOAD_SIZE(&drr->drr_u.drr_spill);
4095			(void) recv_read(hdl, fd, buf, payload_size,
4096			    B_FALSE, NULL);
4097			break;
4098		case DRR_WRITE_EMBEDDED:
4099			if (byteswap) {
4100				drr->drr_u.drr_write_embedded.drr_psize =
4101				    BSWAP_32(drr->drr_u.drr_write_embedded.
4102				    drr_psize);
4103			}
4104			(void) recv_read(hdl, fd, buf,
4105			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
4106			    8), B_FALSE, NULL);
4107			break;
4108		case DRR_OBJECT_RANGE:
4109		case DRR_WRITE_BYREF:
4110		case DRR_FREEOBJECTS:
4111		case DRR_FREE:
4112			break;
4113
4114		default:
4115			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4116			    "invalid record type"));
4117			free(buf);
4118			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4119		}
4120	}
4121
4122	free(buf);
4123	return (-1);
4124}
4125
4126static void
4127recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
4128    boolean_t resumable, boolean_t checksum)
4129{
4130	char target_fs[ZFS_MAX_DATASET_NAME_LEN];
4131
4132	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, (checksum ?
4133	    "checksum mismatch" : "incomplete stream")));
4134
4135	if (!resumable)
4136		return;
4137	(void) strlcpy(target_fs, target_snap, sizeof (target_fs));
4138	*strchr(target_fs, '@') = '\0';
4139	zfs_handle_t *zhp = zfs_open(hdl, target_fs,
4140	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
4141	if (zhp == NULL)
4142		return;
4143
4144	char token_buf[ZFS_MAXPROPLEN];
4145	int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
4146	    token_buf, sizeof (token_buf),
4147	    NULL, NULL, 0, B_TRUE);
4148	if (error == 0) {
4149		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4150		    "checksum mismatch or incomplete stream.\n"
4151		    "Partially received snapshot is saved.\n"
4152		    "A resuming stream can be generated on the sending "
4153		    "system by running:\n"
4154		    "    zfs send -t %s"),
4155		    token_buf);
4156	}
4157	zfs_close(zhp);
4158}
4159
4160/*
4161 * Prepare a new nvlist of properties that are to override (-o) or be excluded
4162 * (-x) from the received dataset
4163 * recvprops: received properties from the send stream
4164 * cmdprops: raw input properties from command line
4165 * origprops: properties, both locally-set and received, currently set on the
4166 *            target dataset if it exists, NULL otherwise.
4167 * oxprops: valid output override (-o) and excluded (-x) properties
4168 */
4169static int
4170zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type,
4171    char *fsname, boolean_t zoned, boolean_t recursive, boolean_t newfs,
4172    boolean_t raw, boolean_t toplevel, nvlist_t *recvprops, nvlist_t *cmdprops,
4173    nvlist_t *origprops, nvlist_t **oxprops, uint8_t **wkeydata_out,
4174    uint_t *wkeylen_out, const char *errbuf)
4175{
4176	nvpair_t *nvp;
4177	nvlist_t *oprops, *voprops;
4178	zfs_handle_t *zhp = NULL;
4179	zpool_handle_t *zpool_hdl = NULL;
4180	char *cp;
4181	int ret = 0;
4182	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
4183
4184	if (nvlist_empty(cmdprops))
4185		return (0); /* No properties to override or exclude */
4186
4187	*oxprops = fnvlist_alloc();
4188	oprops = fnvlist_alloc();
4189
4190	strlcpy(namebuf, fsname, ZFS_MAX_DATASET_NAME_LEN);
4191
4192	/*
4193	 * Get our dataset handle. The target dataset may not exist yet.
4194	 */
4195	if (zfs_dataset_exists(hdl, namebuf, ZFS_TYPE_DATASET)) {
4196		zhp = zfs_open(hdl, namebuf, ZFS_TYPE_DATASET);
4197		if (zhp == NULL) {
4198			ret = -1;
4199			goto error;
4200		}
4201	}
4202
4203	/* open the zpool handle */
4204	cp = strchr(namebuf, '/');
4205	if (cp != NULL)
4206		*cp = '\0';
4207	zpool_hdl = zpool_open(hdl, namebuf);
4208	if (zpool_hdl == NULL) {
4209		ret = -1;
4210		goto error;
4211	}
4212
4213	/* restore namebuf to match fsname for later use */
4214	if (cp != NULL)
4215		*cp = '/';
4216
4217	/*
4218	 * first iteration: process excluded (-x) properties now and gather
4219	 * added (-o) properties to be later processed by zfs_valid_proplist()
4220	 */
4221	nvp = NULL;
4222	while ((nvp = nvlist_next_nvpair(cmdprops, nvp)) != NULL) {
4223		const char *name = nvpair_name(nvp);
4224		zfs_prop_t prop = zfs_name_to_prop(name);
4225
4226		/*
4227		 * It turns out, if we don't normalize "aliased" names
4228		 * e.g. compress= against the "real" names (e.g. compression)
4229		 * here, then setting/excluding them does not work as
4230		 * intended.
4231		 *
4232		 * But since user-defined properties wouldn't have a valid
4233		 * mapping here, we do this conditional dance.
4234		 */
4235		const char *newname = name;
4236		if (prop >= ZFS_PROP_TYPE)
4237			newname = zfs_prop_to_name(prop);
4238
4239		/* "origin" is processed separately, don't handle it here */
4240		if (prop == ZFS_PROP_ORIGIN)
4241			continue;
4242
4243		/* raw streams can't override encryption properties */
4244		if ((zfs_prop_encryption_key_param(prop) ||
4245		    prop == ZFS_PROP_ENCRYPTION) && raw) {
4246			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4247			    "encryption property '%s' cannot "
4248			    "be set or excluded for raw streams."), name);
4249			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4250			goto error;
4251		}
4252
4253		/*
4254		 * For plain replicated send, we can ignore encryption
4255		 * properties other than first stream
4256		 */
4257		if ((zfs_prop_encryption_key_param(prop) || prop ==
4258		    ZFS_PROP_ENCRYPTION) && !newfs && recursive && !raw) {
4259			continue;
4260		}
4261
4262		/* incremental streams can only exclude encryption properties */
4263		if ((zfs_prop_encryption_key_param(prop) ||
4264		    prop == ZFS_PROP_ENCRYPTION) && !newfs &&
4265		    nvpair_type(nvp) != DATA_TYPE_BOOLEAN) {
4266			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4267			    "encryption property '%s' cannot "
4268			    "be set for incremental streams."), name);
4269			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4270			goto error;
4271		}
4272
4273		switch (nvpair_type(nvp)) {
4274		case DATA_TYPE_BOOLEAN: /* -x property */
4275			/*
4276			 * DATA_TYPE_BOOLEAN is the way we're asked to "exclude"
4277			 * a property: this is done by forcing an explicit
4278			 * inherit on the destination so the effective value is
4279			 * not the one we received from the send stream.
4280			 */
4281			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
4282			    !zfs_prop_user(name)) {
4283				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4284				    "Warning: %s: property '%s' does not "
4285				    "apply to datasets of this type\n"),
4286				    fsname, name);
4287				continue;
4288			}
4289			/*
4290			 * We do this only if the property is not already
4291			 * locally-set, in which case its value will take
4292			 * priority over the received anyway.
4293			 */
4294			if (nvlist_exists(origprops, newname)) {
4295				nvlist_t *attrs;
4296				const char *source = NULL;
4297
4298				attrs = fnvlist_lookup_nvlist(origprops,
4299				    newname);
4300				if (nvlist_lookup_string(attrs,
4301				    ZPROP_SOURCE, &source) == 0 &&
4302				    strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)
4303					continue;
4304			}
4305			/*
4306			 * We can't force an explicit inherit on non-inheritable
4307			 * properties: if we're asked to exclude this kind of
4308			 * values we remove them from "recvprops" input nvlist.
4309			 */
4310			if (!zfs_prop_user(name) && /* can be inherited too */
4311			    !zfs_prop_inheritable(prop) &&
4312			    nvlist_exists(recvprops, newname))
4313				fnvlist_remove(recvprops, newname);
4314			else
4315				fnvlist_add_boolean(*oxprops, newname);
4316			break;
4317		case DATA_TYPE_STRING: /* -o property=value */
4318			/*
4319			 * we're trying to override a property that does not
4320			 * make sense for this type of dataset, but we don't
4321			 * want to fail if the receive is recursive: this comes
4322			 * in handy when the send stream contains, for
4323			 * instance, a child ZVOL and we're trying to receive
4324			 * it with "-o atime=on"
4325			 */
4326			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
4327			    !zfs_prop_user(name)) {
4328				if (recursive)
4329					continue;
4330				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4331				    "property '%s' does not apply to datasets "
4332				    "of this type"), name);
4333				ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4334				goto error;
4335			}
4336			fnvlist_add_string(oprops, newname,
4337			    fnvpair_value_string(nvp));
4338			break;
4339		default:
4340			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4341			    "property '%s' must be a string or boolean"), name);
4342			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4343			goto error;
4344		}
4345	}
4346
4347	if (toplevel) {
4348		/* convert override strings properties to native */
4349		if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET,
4350		    oprops, zoned, zhp, zpool_hdl, B_FALSE, errbuf)) == NULL) {
4351			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4352			goto error;
4353		}
4354
4355		/*
4356		 * zfs_crypto_create() requires the parent name. Get it
4357		 * by truncating the fsname copy stored in namebuf.
4358		 */
4359		cp = strrchr(namebuf, '/');
4360		if (cp != NULL)
4361			*cp = '\0';
4362
4363		if (!raw && !(!newfs && recursive) &&
4364		    zfs_crypto_create(hdl, namebuf, voprops, NULL,
4365		    B_FALSE, wkeydata_out, wkeylen_out) != 0) {
4366			fnvlist_free(voprops);
4367			ret = zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
4368			goto error;
4369		}
4370
4371		/* second pass: process "-o" properties */
4372		fnvlist_merge(*oxprops, voprops);
4373		fnvlist_free(voprops);
4374	} else {
4375		/* override props on child dataset are inherited */
4376		nvp = NULL;
4377		while ((nvp = nvlist_next_nvpair(oprops, nvp)) != NULL) {
4378			const char *name = nvpair_name(nvp);
4379			fnvlist_add_boolean(*oxprops, name);
4380		}
4381	}
4382
4383error:
4384	if (zhp != NULL)
4385		zfs_close(zhp);
4386	if (zpool_hdl != NULL)
4387		zpool_close(zpool_hdl);
4388	fnvlist_free(oprops);
4389	return (ret);
4390}
4391
4392/*
4393 * Restores a backup of tosnap from the file descriptor specified by infd.
4394 */
4395static int
4396zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
4397    const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
4398    dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
4399    avl_tree_t *stream_avl, char **top_zfs,
4400    const char *finalsnap, nvlist_t *cmdprops)
4401{
4402	struct timespec begin_time;
4403	int ioctl_err, ioctl_errno, err;
4404	char *cp;
4405	struct drr_begin *drrb = &drr->drr_u.drr_begin;
4406	char errbuf[ERRBUFLEN];
4407	const char *chopprefix;
4408	boolean_t newfs = B_FALSE;
4409	boolean_t stream_wantsnewfs, stream_resumingnewfs;
4410	boolean_t newprops = B_FALSE;
4411	uint64_t read_bytes = 0;
4412	uint64_t errflags = 0;
4413	uint64_t parent_snapguid = 0;
4414	prop_changelist_t *clp = NULL;
4415	nvlist_t *snapprops_nvlist = NULL;
4416	nvlist_t *snapholds_nvlist = NULL;
4417	zprop_errflags_t prop_errflags;
4418	nvlist_t *prop_errors = NULL;
4419	boolean_t recursive;
4420	const char *snapname = NULL;
4421	char destsnap[MAXPATHLEN * 2];
4422	char origin[MAXNAMELEN] = {0};
4423	char name[MAXPATHLEN];
4424	char tmp_keylocation[MAXNAMELEN] = {0};
4425	nvlist_t *rcvprops = NULL; /* props received from the send stream */
4426	nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */
4427	nvlist_t *origprops = NULL; /* original props (if destination exists) */
4428	zfs_type_t type = ZFS_TYPE_INVALID;
4429	boolean_t toplevel = B_FALSE;
4430	boolean_t zoned = B_FALSE;
4431	boolean_t hastoken = B_FALSE;
4432	boolean_t redacted;
4433	uint8_t *wkeydata = NULL;
4434	uint_t wkeylen = 0;
4435
4436#ifndef CLOCK_MONOTONIC_RAW
4437#define	CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC
4438#endif
4439	clock_gettime(CLOCK_MONOTONIC_RAW, &begin_time);
4440
4441	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4442	    "cannot receive"));
4443
4444	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
4445	    ENOENT);
4446
4447	/* Did the user request holds be skipped via zfs recv -k? */
4448	boolean_t holds = flags->holds && !flags->skipholds;
4449
4450	if (stream_avl != NULL) {
4451		const char *keylocation = NULL;
4452		nvlist_t *lookup = NULL;
4453		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
4454		    &snapname);
4455
4456		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
4457		    &parent_snapguid);
4458		err = nvlist_lookup_nvlist(fs, "props", &rcvprops);
4459		if (err) {
4460			rcvprops = fnvlist_alloc();
4461			newprops = B_TRUE;
4462		}
4463
4464		/*
4465		 * The keylocation property may only be set on encryption roots,
4466		 * but this dataset might not become an encryption root until
4467		 * recv_fix_encryption_hierarchy() is called. That function
4468		 * will fixup the keylocation anyway, so we temporarily unset
4469		 * the keylocation for now to avoid any errors from the receive
4470		 * ioctl.
4471		 */
4472		err = nvlist_lookup_string(rcvprops,
4473		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
4474		if (err == 0) {
4475			strlcpy(tmp_keylocation, keylocation, MAXNAMELEN);
4476			(void) nvlist_remove_all(rcvprops,
4477			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
4478		}
4479
4480		if (flags->canmountoff) {
4481			fnvlist_add_uint64(rcvprops,
4482			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0);
4483		} else if (newprops) {	/* nothing in rcvprops, eliminate it */
4484			fnvlist_free(rcvprops);
4485			rcvprops = NULL;
4486			newprops = B_FALSE;
4487		}
4488		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) {
4489			snapprops_nvlist = fnvlist_lookup_nvlist(lookup,
4490			    snapname);
4491		}
4492		if (holds) {
4493			if (0 == nvlist_lookup_nvlist(fs, "snapholds",
4494			    &lookup)) {
4495				snapholds_nvlist = fnvlist_lookup_nvlist(
4496				    lookup, snapname);
4497			}
4498		}
4499	}
4500
4501	cp = NULL;
4502
4503	/*
4504	 * Determine how much of the snapshot name stored in the stream
4505	 * we are going to tack on to the name they specified on the
4506	 * command line, and how much we are going to chop off.
4507	 *
4508	 * If they specified a snapshot, chop the entire name stored in
4509	 * the stream.
4510	 */
4511	if (flags->istail) {
4512		/*
4513		 * A filesystem was specified with -e. We want to tack on only
4514		 * the tail of the sent snapshot path.
4515		 */
4516		if (strchr(tosnap, '@')) {
4517			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4518			    "argument - snapshot not allowed with -e"));
4519			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4520			goto out;
4521		}
4522
4523		chopprefix = strrchr(sendfs, '/');
4524
4525		if (chopprefix == NULL) {
4526			/*
4527			 * The tail is the poolname, so we need to
4528			 * prepend a path separator.
4529			 */
4530			int len = strlen(drrb->drr_toname);
4531			cp = umem_alloc(len + 2, UMEM_NOFAIL);
4532			cp[0] = '/';
4533			(void) strcpy(&cp[1], drrb->drr_toname);
4534			chopprefix = cp;
4535		} else {
4536			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
4537		}
4538	} else if (flags->isprefix) {
4539		/*
4540		 * A filesystem was specified with -d. We want to tack on
4541		 * everything but the first element of the sent snapshot path
4542		 * (all but the pool name).
4543		 */
4544		if (strchr(tosnap, '@')) {
4545			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4546			    "argument - snapshot not allowed with -d"));
4547			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4548			goto out;
4549		}
4550
4551		chopprefix = strchr(drrb->drr_toname, '/');
4552		if (chopprefix == NULL)
4553			chopprefix = strchr(drrb->drr_toname, '@');
4554	} else if (strchr(tosnap, '@') == NULL) {
4555		/*
4556		 * If a filesystem was specified without -d or -e, we want to
4557		 * tack on everything after the fs specified by 'zfs send'.
4558		 */
4559		chopprefix = drrb->drr_toname + strlen(sendfs);
4560	} else {
4561		/* A snapshot was specified as an exact path (no -d or -e). */
4562		if (recursive) {
4563			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4564			    "cannot specify snapshot name for multi-snapshot "
4565			    "stream"));
4566			err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4567			goto out;
4568		}
4569		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
4570	}
4571
4572	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
4573	ASSERT(chopprefix > drrb->drr_toname || strchr(sendfs, '/') == NULL);
4574	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname) ||
4575	    strchr(sendfs, '/') == NULL);
4576	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
4577	    chopprefix[0] == '\0');
4578
4579	/*
4580	 * Determine name of destination snapshot.
4581	 */
4582	(void) strlcpy(destsnap, tosnap, sizeof (destsnap));
4583	(void) strlcat(destsnap, chopprefix, sizeof (destsnap));
4584	if (cp != NULL)
4585		umem_free(cp, strlen(cp) + 1);
4586	if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) {
4587		err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4588		goto out;
4589	}
4590
4591	/*
4592	 * Determine the name of the origin snapshot.
4593	 */
4594	if (originsnap) {
4595		(void) strlcpy(origin, originsnap, sizeof (origin));
4596		if (flags->verbose)
4597			(void) printf("using provided clone origin %s\n",
4598			    origin);
4599	} else if (drrb->drr_flags & DRR_FLAG_CLONE) {
4600		if (guid_to_name(hdl, destsnap,
4601		    drrb->drr_fromguid, B_FALSE, origin) != 0) {
4602			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4603			    "local origin for clone %s does not exist"),
4604			    destsnap);
4605			err = zfs_error(hdl, EZFS_NOENT, errbuf);
4606			goto out;
4607		}
4608		if (flags->verbose)
4609			(void) printf("found clone origin %s\n", origin);
4610	}
4611
4612	if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4613	    DMU_BACKUP_FEATURE_DEDUP)) {
4614		(void) fprintf(stderr,
4615		    gettext("ERROR: \"zfs receive\" no longer supports "
4616		    "deduplicated send streams.  Use\n"
4617		    "the \"zstream redup\" command to convert this stream "
4618		    "to a regular,\n"
4619		    "non-deduplicated stream.\n"));
4620		err = zfs_error(hdl, EZFS_NOTSUP, errbuf);
4621		goto out;
4622	}
4623
4624	boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4625	    DMU_BACKUP_FEATURE_RESUMING;
4626	boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4627	    DMU_BACKUP_FEATURE_RAW;
4628	boolean_t embedded = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4629	    DMU_BACKUP_FEATURE_EMBED_DATA;
4630	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
4631	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
4632	stream_resumingnewfs = (drrb->drr_fromguid == 0 ||
4633	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && resuming;
4634
4635	if (stream_wantsnewfs) {
4636		/*
4637		 * if the parent fs does not exist, look for it based on
4638		 * the parent snap GUID
4639		 */
4640		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4641		    "cannot receive new filesystem stream"));
4642
4643		(void) strlcpy(name, destsnap, sizeof (name));
4644		cp = strrchr(name, '/');
4645		if (cp)
4646			*cp = '\0';
4647		if (cp &&
4648		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4649			char suffix[ZFS_MAX_DATASET_NAME_LEN];
4650			(void) strlcpy(suffix, strrchr(destsnap, '/'),
4651			    sizeof (suffix));
4652			if (guid_to_name(hdl, name, parent_snapguid,
4653			    B_FALSE, destsnap) == 0) {
4654				*strchr(destsnap, '@') = '\0';
4655				(void) strlcat(destsnap, suffix,
4656				    sizeof (destsnap));
4657			}
4658		}
4659	} else {
4660		/*
4661		 * If the fs does not exist, look for it based on the
4662		 * fromsnap GUID.
4663		 */
4664		if (resuming) {
4665			(void) snprintf(errbuf, sizeof (errbuf),
4666			    dgettext(TEXT_DOMAIN,
4667			    "cannot receive resume stream"));
4668		} else {
4669			(void) snprintf(errbuf, sizeof (errbuf),
4670			    dgettext(TEXT_DOMAIN,
4671			    "cannot receive incremental stream"));
4672		}
4673
4674		(void) strlcpy(name, destsnap, sizeof (name));
4675		*strchr(name, '@') = '\0';
4676
4677		/*
4678		 * If the exact receive path was specified and this is the
4679		 * topmost path in the stream, then if the fs does not exist we
4680		 * should look no further.
4681		 */
4682		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
4683		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
4684		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4685			char snap[ZFS_MAX_DATASET_NAME_LEN];
4686			(void) strlcpy(snap, strchr(destsnap, '@'),
4687			    sizeof (snap));
4688			if (guid_to_name(hdl, name, drrb->drr_fromguid,
4689			    B_FALSE, destsnap) == 0) {
4690				*strchr(destsnap, '@') = '\0';
4691				(void) strlcat(destsnap, snap,
4692				    sizeof (destsnap));
4693			}
4694		}
4695	}
4696
4697	(void) strlcpy(name, destsnap, sizeof (name));
4698	*strchr(name, '@') = '\0';
4699
4700	redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4701	    DMU_BACKUP_FEATURE_REDACTED;
4702
4703	if (flags->heal) {
4704		if (flags->isprefix || flags->istail || flags->force ||
4705		    flags->canmountoff || flags->resumable || flags->nomount ||
4706		    flags->skipholds) {
4707			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4708			    "corrective recv can not be used when combined with"
4709			    " this flag"));
4710			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4711			goto out;
4712		}
4713		uint64_t guid =
4714		    get_snap_guid(hdl, name, strchr(destsnap, '@') + 1);
4715		if (guid == 0) {
4716			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4717			    "corrective recv must specify an existing snapshot"
4718			    " to heal"));
4719			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4720			goto out;
4721		} else if (guid != drrb->drr_toguid) {
4722			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4723			    "local snapshot doesn't match the snapshot"
4724			    " in the provided stream"));
4725			err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4726			goto out;
4727		}
4728	} else if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4729		zfs_cmd_t zc = {"\0"};
4730		zfs_handle_t *zhp = NULL;
4731		boolean_t encrypted;
4732
4733		(void) strcpy(zc.zc_name, name);
4734
4735		/*
4736		 * Destination fs exists.  It must be one of these cases:
4737		 *  - an incremental send stream
4738		 *  - the stream specifies a new fs (full stream or clone)
4739		 *    and they want us to blow away the existing fs (and
4740		 *    have therefore specified -F and removed any snapshots)
4741		 *  - we are resuming a failed receive.
4742		 */
4743		if (stream_wantsnewfs) {
4744			boolean_t is_volume = drrb->drr_type == DMU_OST_ZVOL;
4745			if (!flags->force) {
4746				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4747				    "destination '%s' exists\n"
4748				    "must specify -F to overwrite it"), name);
4749				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4750				goto out;
4751			}
4752			if (zfs_ioctl(hdl, ZFS_IOC_SNAPSHOT_LIST_NEXT,
4753			    &zc) == 0) {
4754				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4755				    "destination has snapshots (eg. %s)\n"
4756				    "must destroy them to overwrite it"),
4757				    zc.zc_name);
4758				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4759				goto out;
4760			}
4761			if (is_volume && strrchr(name, '/') == NULL) {
4762				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4763				    "destination %s is the root dataset\n"
4764				    "cannot overwrite with a ZVOL"),
4765				    name);
4766				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4767				goto out;
4768			}
4769			if (is_volume &&
4770			    zfs_ioctl(hdl, ZFS_IOC_DATASET_LIST_NEXT,
4771			    &zc) == 0) {
4772				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4773				    "destination has children (eg. %s)\n"
4774				    "cannot overwrite with a ZVOL"),
4775				    zc.zc_name);
4776				err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4777				goto out;
4778			}
4779		}
4780
4781		if ((zhp = zfs_open(hdl, name,
4782		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
4783			err = -1;
4784			goto out;
4785		}
4786
4787		/*
4788		 * When receiving full/newfs on existing dataset, then it
4789		 * should be done with "-F" flag. Its enforced for initial
4790		 * receive in previous checks in this function.
4791		 * Similarly, on resuming full/newfs recv on existing dataset,
4792		 * it should be done with "-F" flag.
4793		 *
4794		 * When dataset doesn't exist, then full/newfs recv is done on
4795		 * newly created dataset and it's marked INCONSISTENT. But
4796		 * When receiving on existing dataset, recv is first done on
4797		 * %recv and its marked INCONSISTENT. Existing dataset is not
4798		 * marked INCONSISTENT.
4799		 * Resume of full/newfs receive with dataset not INCONSISTENT
4800		 * indicates that its resuming newfs on existing dataset. So,
4801		 * enforce "-F" flag in this case.
4802		 */
4803		if (stream_resumingnewfs &&
4804		    !zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
4805		    !flags->force) {
4806			zfs_close(zhp);
4807			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4808			    "Resuming recv on existing destination '%s'\n"
4809			    "must specify -F to overwrite it"), name);
4810			err = zfs_error(hdl, EZFS_RESUME_EXISTS, errbuf);
4811			goto out;
4812		}
4813
4814		if (stream_wantsnewfs &&
4815		    zhp->zfs_dmustats.dds_origin[0]) {
4816			zfs_close(zhp);
4817			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4818			    "destination '%s' is a clone\n"
4819			    "must destroy it to overwrite it"), name);
4820			err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4821			goto out;
4822		}
4823
4824		/*
4825		 * Raw sends can not be performed as an incremental on top
4826		 * of existing unencrypted datasets. zfs recv -F can't be
4827		 * used to blow away an existing encrypted filesystem. This
4828		 * is because it would require the dsl dir to point to the
4829		 * new key (or lack of a key) and the old key at the same
4830		 * time. The -F flag may still be used for deleting
4831		 * intermediate snapshots that would otherwise prevent the
4832		 * receive from working.
4833		 */
4834		encrypted = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) !=
4835		    ZIO_CRYPT_OFF;
4836		if (!stream_wantsnewfs && !encrypted && raw) {
4837			zfs_close(zhp);
4838			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4839			    "cannot perform raw receive on top of "
4840			    "existing unencrypted dataset"));
4841			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4842			goto out;
4843		}
4844
4845		if (stream_wantsnewfs && flags->force &&
4846		    ((raw && !encrypted) || encrypted)) {
4847			zfs_close(zhp);
4848			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4849			    "zfs receive -F cannot be used to destroy an "
4850			    "encrypted filesystem or overwrite an "
4851			    "unencrypted one with an encrypted one"));
4852			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4853			goto out;
4854		}
4855
4856		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
4857		    (stream_wantsnewfs || stream_resumingnewfs)) {
4858			/* We can't do online recv in this case */
4859			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
4860			    flags->forceunmount ? MS_FORCE : 0);
4861			if (clp == NULL) {
4862				zfs_close(zhp);
4863				err = -1;
4864				goto out;
4865			}
4866			if (changelist_prefix(clp) != 0) {
4867				changelist_free(clp);
4868				zfs_close(zhp);
4869				err = -1;
4870				goto out;
4871			}
4872		}
4873
4874		/*
4875		 * If we are resuming a newfs, set newfs here so that we will
4876		 * mount it if the recv succeeds this time.  We can tell
4877		 * that it was a newfs on the first recv because the fs
4878		 * itself will be inconsistent (if the fs existed when we
4879		 * did the first recv, we would have received it into
4880		 * .../%recv).
4881		 */
4882		if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
4883			newfs = B_TRUE;
4884
4885		/* we want to know if we're zoned when validating -o|-x props */
4886		zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
4887
4888		/* may need this info later, get it now we have zhp around */
4889		if (zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, NULL, 0,
4890		    NULL, NULL, 0, B_TRUE) == 0)
4891			hastoken = B_TRUE;
4892
4893		/* gather existing properties on destination */
4894		origprops = fnvlist_alloc();
4895		fnvlist_merge(origprops, zhp->zfs_props);
4896		fnvlist_merge(origprops, zhp->zfs_user_props);
4897
4898		zfs_close(zhp);
4899	} else {
4900		zfs_handle_t *zhp;
4901
4902		/*
4903		 * Destination filesystem does not exist.  Therefore we better
4904		 * be creating a new filesystem (either from a full backup, or
4905		 * a clone).  It would therefore be invalid if the user
4906		 * specified only the pool name (i.e. if the destination name
4907		 * contained no slash character).
4908		 */
4909		cp = strrchr(name, '/');
4910
4911		if (!stream_wantsnewfs || cp == NULL) {
4912			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4913			    "destination '%s' does not exist"), name);
4914			err = zfs_error(hdl, EZFS_NOENT, errbuf);
4915			goto out;
4916		}
4917
4918		/*
4919		 * Trim off the final dataset component so we perform the
4920		 * recvbackup ioctl to the filesystems's parent.
4921		 */
4922		*cp = '\0';
4923
4924		if (flags->isprefix && !flags->istail && !flags->dryrun &&
4925		    create_parents(hdl, destsnap, strlen(tosnap)) != 0) {
4926			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4927			goto out;
4928		}
4929
4930		/* validate parent */
4931		zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
4932		if (zhp == NULL) {
4933			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4934			goto out;
4935		}
4936		if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
4937			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4938			    "parent '%s' is not a filesystem"), name);
4939			err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4940			zfs_close(zhp);
4941			goto out;
4942		}
4943
4944		zfs_close(zhp);
4945
4946		newfs = B_TRUE;
4947		*cp = '/';
4948	}
4949
4950	if (flags->verbose) {
4951		(void) printf("%s %s%s stream of %s into %s\n",
4952		    flags->dryrun ? "would receive" : "receiving",
4953		    flags->heal ? " corrective" : "",
4954		    drrb->drr_fromguid ? "incremental" : "full",
4955		    drrb->drr_toname, destsnap);
4956		(void) fflush(stdout);
4957	}
4958
4959	/*
4960	 * If this is the top-level dataset, record it so we can use it
4961	 * for recursive operations later.
4962	 */
4963	if (top_zfs != NULL &&
4964	    (*top_zfs == NULL || strcmp(*top_zfs, name) == 0)) {
4965		toplevel = B_TRUE;
4966		if (*top_zfs == NULL)
4967			*top_zfs = zfs_strdup(hdl, name);
4968	}
4969
4970	if (drrb->drr_type == DMU_OST_ZVOL) {
4971		type = ZFS_TYPE_VOLUME;
4972	} else if (drrb->drr_type == DMU_OST_ZFS) {
4973		type = ZFS_TYPE_FILESYSTEM;
4974	} else {
4975		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4976		    "invalid record type: 0x%d"), drrb->drr_type);
4977		err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4978		goto out;
4979	}
4980	if ((err = zfs_setup_cmdline_props(hdl, type, name, zoned, recursive,
4981	    stream_wantsnewfs, raw, toplevel, rcvprops, cmdprops, origprops,
4982	    &oxprops, &wkeydata, &wkeylen, errbuf)) != 0)
4983		goto out;
4984
4985	/*
4986	 * When sending with properties (zfs send -p), the encryption property
4987	 * is not included because it is a SETONCE property and therefore
4988	 * treated as read only. However, we are always able to determine its
4989	 * value because raw sends will include it in the DRR_BDEGIN payload
4990	 * and non-raw sends with properties are not allowed for encrypted
4991	 * datasets. Therefore, if this is a non-raw properties stream, we can
4992	 * infer that the value should be ZIO_CRYPT_OFF and manually add that
4993	 * to the received properties.
4994	 */
4995	if (stream_wantsnewfs && !raw && rcvprops != NULL &&
4996	    !nvlist_exists(cmdprops, zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) {
4997		if (oxprops == NULL)
4998			oxprops = fnvlist_alloc();
4999		fnvlist_add_uint64(oxprops,
5000		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF);
5001	}
5002
5003	if (flags->dryrun) {
5004		void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
5005
5006		/*
5007		 * We have read the DRR_BEGIN record, but we have
5008		 * not yet read the payload. For non-dryrun sends
5009		 * this will be done by the kernel, so we must
5010		 * emulate that here, before attempting to read
5011		 * more records.
5012		 */
5013		err = recv_read(hdl, infd, buf, drr->drr_payloadlen,
5014		    flags->byteswap, NULL);
5015		free(buf);
5016		if (err != 0)
5017			goto out;
5018
5019		err = recv_skip(hdl, infd, flags->byteswap);
5020		goto out;
5021	}
5022
5023	if (flags->heal) {
5024		err = ioctl_err = lzc_receive_with_heal(destsnap, rcvprops,
5025		    oxprops, wkeydata, wkeylen, origin, flags->force,
5026		    flags->heal, flags->resumable, raw, infd, drr_noswap, -1,
5027		    &read_bytes, &errflags, NULL, &prop_errors);
5028	} else {
5029		err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
5030		    oxprops, wkeydata, wkeylen, origin, flags->force,
5031		    flags->resumable, raw, infd, drr_noswap, -1, &read_bytes,
5032		    &errflags, NULL, &prop_errors);
5033	}
5034	ioctl_errno = ioctl_err;
5035	prop_errflags = errflags;
5036
5037	if (err == 0) {
5038		nvpair_t *prop_err = NULL;
5039
5040		while ((prop_err = nvlist_next_nvpair(prop_errors,
5041		    prop_err)) != NULL) {
5042			char tbuf[1024];
5043			zfs_prop_t prop;
5044			int intval;
5045
5046			prop = zfs_name_to_prop(nvpair_name(prop_err));
5047			(void) nvpair_value_int32(prop_err, &intval);
5048			if (strcmp(nvpair_name(prop_err),
5049			    ZPROP_N_MORE_ERRORS) == 0) {
5050				trunc_prop_errs(intval);
5051				break;
5052			} else if (snapname == NULL || finalsnap == NULL ||
5053			    strcmp(finalsnap, snapname) == 0 ||
5054			    strcmp(nvpair_name(prop_err),
5055			    zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
5056				/*
5057				 * Skip the special case of, for example,
5058				 * "refquota", errors on intermediate
5059				 * snapshots leading up to a final one.
5060				 * That's why we have all of the checks above.
5061				 *
5062				 * See zfs_ioctl.c's extract_delay_props() for
5063				 * a list of props which can fail on
5064				 * intermediate snapshots, but shouldn't
5065				 * affect the overall receive.
5066				 */
5067				(void) snprintf(tbuf, sizeof (tbuf),
5068				    dgettext(TEXT_DOMAIN,
5069				    "cannot receive %s property on %s"),
5070				    nvpair_name(prop_err), name);
5071				zfs_setprop_error(hdl, prop, intval, tbuf);
5072			}
5073		}
5074	}
5075
5076	if (err == 0 && snapprops_nvlist) {
5077		zfs_cmd_t zc = {"\0"};
5078
5079		(void) strlcpy(zc.zc_name, destsnap, sizeof (zc.zc_name));
5080		zc.zc_cookie = B_TRUE; /* received */
5081		zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist);
5082		(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
5083		zcmd_free_nvlists(&zc);
5084	}
5085	if (err == 0 && snapholds_nvlist) {
5086		nvpair_t *pair;
5087		nvlist_t *holds, *errors = NULL;
5088		int cleanup_fd = -1;
5089
5090		VERIFY(0 == nvlist_alloc(&holds, 0, KM_SLEEP));
5091		for (pair = nvlist_next_nvpair(snapholds_nvlist, NULL);
5092		    pair != NULL;
5093		    pair = nvlist_next_nvpair(snapholds_nvlist, pair)) {
5094			fnvlist_add_string(holds, destsnap, nvpair_name(pair));
5095		}
5096		(void) lzc_hold(holds, cleanup_fd, &errors);
5097		fnvlist_free(snapholds_nvlist);
5098		fnvlist_free(holds);
5099	}
5100
5101	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
5102		/*
5103		 * It may be that this snapshot already exists,
5104		 * in which case we want to consume & ignore it
5105		 * rather than failing.
5106		 */
5107		avl_tree_t *local_avl;
5108		nvlist_t *local_nv, *fs;
5109		cp = strchr(destsnap, '@');
5110
5111		/*
5112		 * XXX Do this faster by just iterating over snaps in
5113		 * this fs.  Also if zc_value does not exist, we will
5114		 * get a strange "does not exist" error message.
5115		 */
5116		*cp = '\0';
5117		if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
5118		    B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE,
5119		    B_TRUE, &local_nv, &local_avl) == 0) {
5120			*cp = '@';
5121			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
5122			fsavl_destroy(local_avl);
5123			fnvlist_free(local_nv);
5124
5125			if (fs != NULL) {
5126				if (flags->verbose) {
5127					(void) printf("snap %s already exists; "
5128					    "ignoring\n", destsnap);
5129				}
5130				err = ioctl_err = recv_skip(hdl, infd,
5131				    flags->byteswap);
5132			}
5133		}
5134		*cp = '@';
5135	}
5136
5137	if (ioctl_err != 0) {
5138		switch (ioctl_errno) {
5139		case ENODEV:
5140			cp = strchr(destsnap, '@');
5141			*cp = '\0';
5142			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5143			    "most recent snapshot of %s does not\n"
5144			    "match incremental source"), destsnap);
5145			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
5146			*cp = '@';
5147			break;
5148		case ETXTBSY:
5149			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5150			    "destination %s has been modified\n"
5151			    "since most recent snapshot"), name);
5152			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
5153			break;
5154		case EACCES:
5155			if (flags->heal) {
5156				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5157				    "key must be loaded to do a non-raw "
5158				    "corrective recv on an encrypted "
5159				    "dataset."));
5160			} else if (raw && stream_wantsnewfs) {
5161				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5162				    "failed to create encryption key"));
5163			} else if (raw && !stream_wantsnewfs) {
5164				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5165				    "encryption key does not match "
5166				    "existing key"));
5167			} else {
5168				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5169				    "inherited key must be loaded"));
5170			}
5171			(void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
5172			break;
5173		case EEXIST:
5174			cp = strchr(destsnap, '@');
5175			if (newfs) {
5176				/* it's the containing fs that exists */
5177				*cp = '\0';
5178			}
5179			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5180			    "destination already exists"));
5181			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
5182			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
5183			    destsnap);
5184			*cp = '@';
5185			break;
5186		case EINVAL:
5187			if (embedded && !raw) {
5188				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5189				    "incompatible embedded data stream "
5190				    "feature with encrypted receive."));
5191			} else if (flags->resumable) {
5192				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5193				    "kernel modules must be upgraded to "
5194				    "receive this stream."));
5195			}
5196			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5197			break;
5198		case ECKSUM:
5199		case ZFS_ERR_STREAM_TRUNCATED:
5200			if (flags->heal)
5201				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5202				    "corrective receive was not able to "
5203				    "reconstruct the data needed for "
5204				    "healing."));
5205			else
5206				recv_ecksum_set_aux(hdl, destsnap,
5207				    flags->resumable, ioctl_err == ECKSUM);
5208			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5209			break;
5210		case ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH:
5211			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5212			    "incremental send stream requires -L "
5213			    "(--large-block), to match previous receive."));
5214			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5215			break;
5216		case ENOTSUP:
5217			if (flags->heal)
5218				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5219				    "stream is not compatible with the "
5220				    "data in the pool."));
5221			else
5222				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5223				    "pool must be upgraded to receive this "
5224				    "stream."));
5225			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
5226			break;
5227		case ZFS_ERR_CRYPTO_NOTSUP:
5228			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5229			    "stream uses crypto parameters not compatible with "
5230			    "this pool"));
5231			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5232			break;
5233		case EDQUOT:
5234			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5235			    "destination %s space quota exceeded."), name);
5236			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
5237			break;
5238		case ZFS_ERR_FROM_IVSET_GUID_MISSING:
5239			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5240			    "IV set guid missing. See errata %u at "
5241			    "https://openzfs.github.io/openzfs-docs/msg/"
5242			    "ZFS-8000-ER."),
5243			    ZPOOL_ERRATA_ZOL_8308_ENCRYPTION);
5244			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5245			break;
5246		case ZFS_ERR_FROM_IVSET_GUID_MISMATCH:
5247			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5248			    "IV set guid mismatch. See the 'zfs receive' "
5249			    "man page section\n discussing the limitations "
5250			    "of raw encrypted send streams."));
5251			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5252			break;
5253		case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING:
5254			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5255			    "Spill block flag missing for raw send.\n"
5256			    "The zfs software on the sending system must "
5257			    "be updated."));
5258			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5259			break;
5260		case ZFS_ERR_RESUME_EXISTS:
5261			cp = strchr(destsnap, '@');
5262			if (newfs) {
5263				/* it's the containing fs that exists */
5264				*cp = '\0';
5265			}
5266			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5267			    "Resuming recv on existing dataset without force"));
5268			(void) zfs_error_fmt(hdl, EZFS_RESUME_EXISTS,
5269			    dgettext(TEXT_DOMAIN, "cannot resume recv %s"),
5270			    destsnap);
5271			*cp = '@';
5272			break;
5273		case E2BIG:
5274			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5275			    "zfs receive required kernel memory allocation "
5276			    "larger than the system can support. Please file "
5277			    "an issue at the OpenZFS issue tracker:\n"
5278			    "https://github.com/openzfs/zfs/issues/new"));
5279			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5280			break;
5281		case EBUSY:
5282			if (hastoken) {
5283				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5284				    "destination %s contains "
5285				    "partially-complete state from "
5286				    "\"zfs receive -s\"."), name);
5287				(void) zfs_error(hdl, EZFS_BUSY, errbuf);
5288				break;
5289			}
5290			zfs_fallthrough;
5291		default:
5292			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
5293		}
5294	}
5295
5296	/*
5297	 * Mount the target filesystem (if created).  Also mount any
5298	 * children of the target filesystem if we did a replication
5299	 * receive (indicated by stream_avl being non-NULL).
5300	 */
5301	if (clp) {
5302		if (!flags->nomount)
5303			err |= changelist_postfix(clp);
5304		changelist_free(clp);
5305	}
5306
5307	if ((newfs || stream_avl) && type == ZFS_TYPE_FILESYSTEM && !redacted)
5308		flags->domount = B_TRUE;
5309
5310	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
5311		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
5312		    "failed to clear unreceived properties on %s"), name);
5313		(void) fprintf(stderr, "\n");
5314	}
5315	if (prop_errflags & ZPROP_ERR_NORESTORE) {
5316		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
5317		    "failed to restore original properties on %s"), name);
5318		(void) fprintf(stderr, "\n");
5319	}
5320
5321	if (err || ioctl_err) {
5322		err = -1;
5323		goto out;
5324	}
5325
5326	if (flags->verbose) {
5327		char buf1[64];
5328		char buf2[64];
5329		uint64_t bytes = read_bytes;
5330		struct timespec delta;
5331		clock_gettime(CLOCK_MONOTONIC_RAW, &delta);
5332		if (begin_time.tv_nsec > delta.tv_nsec) {
5333			delta.tv_nsec =
5334			    1000000000 + delta.tv_nsec - begin_time.tv_nsec;
5335			delta.tv_sec -= 1;
5336		} else
5337			delta.tv_nsec -= begin_time.tv_nsec;
5338		delta.tv_sec -= begin_time.tv_sec;
5339		if (delta.tv_sec == 0 && delta.tv_nsec == 0)
5340			delta.tv_nsec = 1;
5341		double delta_f = delta.tv_sec + (delta.tv_nsec / 1e9);
5342		zfs_nicebytes(bytes, buf1, sizeof (buf1));
5343		zfs_nicebytes(bytes / delta_f, buf2, sizeof (buf2));
5344
5345		(void) printf("received %s stream in %.2f seconds (%s/sec)\n",
5346		    buf1, delta_f, buf2);
5347	}
5348
5349	err = 0;
5350out:
5351	if (prop_errors != NULL)
5352		fnvlist_free(prop_errors);
5353
5354	if (tmp_keylocation[0] != '\0') {
5355		fnvlist_add_string(rcvprops,
5356		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation);
5357	}
5358
5359	if (newprops)
5360		fnvlist_free(rcvprops);
5361
5362	fnvlist_free(oxprops);
5363	fnvlist_free(origprops);
5364
5365	return (err);
5366}
5367
5368/*
5369 * Check properties we were asked to override (both -o|-x)
5370 */
5371static boolean_t
5372zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
5373    const char *errbuf)
5374{
5375	nvpair_t *nvp = NULL;
5376	zfs_prop_t prop;
5377	const char *name;
5378
5379	while ((nvp = nvlist_next_nvpair(props, nvp)) != NULL) {
5380		name = nvpair_name(nvp);
5381		prop = zfs_name_to_prop(name);
5382
5383		if (prop == ZPROP_USERPROP) {
5384			if (!zfs_prop_user(name)) {
5385				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5386				    "%s: invalid property '%s'"), errbuf, name);
5387				return (B_FALSE);
5388			}
5389			continue;
5390		}
5391		/*
5392		 * "origin" is readonly but is used to receive datasets as
5393		 * clones so we don't raise an error here
5394		 */
5395		if (prop == ZFS_PROP_ORIGIN)
5396			continue;
5397
5398		/* encryption params have their own verification later */
5399		if (prop == ZFS_PROP_ENCRYPTION ||
5400		    zfs_prop_encryption_key_param(prop))
5401			continue;
5402
5403		/*
5404		 * cannot override readonly, set-once and other specific
5405		 * settable properties
5406		 */
5407		if (zfs_prop_readonly(prop) || prop == ZFS_PROP_VERSION ||
5408		    prop == ZFS_PROP_VOLSIZE) {
5409			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5410			    "%s: invalid property '%s'"), errbuf, name);
5411			return (B_FALSE);
5412		}
5413	}
5414
5415	return (B_TRUE);
5416}
5417
5418static int
5419zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
5420    const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
5421    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs,
5422    const char *finalsnap, nvlist_t *cmdprops)
5423{
5424	int err;
5425	dmu_replay_record_t drr, drr_noswap;
5426	struct drr_begin *drrb = &drr.drr_u.drr_begin;
5427	char errbuf[ERRBUFLEN];
5428	zio_cksum_t zcksum = { { 0 } };
5429	uint64_t featureflags;
5430	int hdrtype;
5431
5432	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
5433	    "cannot receive"));
5434
5435	/* check cmdline props, raise an error if they cannot be received */
5436	if (!zfs_receive_checkprops(hdl, cmdprops, errbuf))
5437		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
5438
5439	if (flags->isprefix &&
5440	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
5441		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
5442		    "(%s) does not exist"), tosnap);
5443		return (zfs_error(hdl, EZFS_NOENT, errbuf));
5444	}
5445	if (originsnap &&
5446	    !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
5447		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
5448		    "(%s) does not exist"), originsnap);
5449		return (zfs_error(hdl, EZFS_NOENT, errbuf));
5450	}
5451
5452	/* read in the BEGIN record */
5453	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
5454	    &zcksum)))
5455		return (err);
5456
5457	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
5458		/* It's the double end record at the end of a package */
5459		return (ENODATA);
5460	}
5461
5462	/* the kernel needs the non-byteswapped begin record */
5463	drr_noswap = drr;
5464
5465	flags->byteswap = B_FALSE;
5466	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
5467		/*
5468		 * We computed the checksum in the wrong byteorder in
5469		 * recv_read() above; do it again correctly.
5470		 */
5471		memset(&zcksum, 0, sizeof (zio_cksum_t));
5472		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
5473		flags->byteswap = B_TRUE;
5474
5475		drr.drr_type = BSWAP_32(drr.drr_type);
5476		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
5477		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
5478		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
5479		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
5480		drrb->drr_type = BSWAP_32(drrb->drr_type);
5481		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
5482		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
5483		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
5484	}
5485
5486	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
5487		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5488		    "stream (bad magic number)"));
5489		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5490	}
5491
5492	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
5493	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
5494
5495	if (!DMU_STREAM_SUPPORTED(featureflags) ||
5496	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
5497		/*
5498		 * Let's be explicit about this one, since rather than
5499		 * being a new feature we can't know, it's an old
5500		 * feature we dropped.
5501		 */
5502		if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
5503			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5504			    "stream has deprecated feature: dedup, try "
5505			    "'zstream redup [send in a file] | zfs recv "
5506			    "[...]'"));
5507		} else {
5508			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5509			    "stream has unsupported feature, feature flags = "
5510			    "%llx (unknown flags = %llx)"),
5511			    (u_longlong_t)featureflags,
5512			    (u_longlong_t)((featureflags) &
5513			    ~DMU_BACKUP_FEATURE_MASK));
5514		}
5515		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5516	}
5517
5518	/* Holds feature is set once in the compound stream header. */
5519	if (featureflags & DMU_BACKUP_FEATURE_HOLDS)
5520		flags->holds = B_TRUE;
5521
5522	if (strchr(drrb->drr_toname, '@') == NULL) {
5523		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5524		    "stream (bad snapshot name)"));
5525		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5526	}
5527
5528	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
5529		char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
5530		if (sendfs == NULL) {
5531			/*
5532			 * We were not called from zfs_receive_package(). Get
5533			 * the fs specified by 'zfs send'.
5534			 */
5535			char *cp;
5536			(void) strlcpy(nonpackage_sendfs,
5537			    drr.drr_u.drr_begin.drr_toname,
5538			    sizeof (nonpackage_sendfs));
5539			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
5540				*cp = '\0';
5541			sendfs = nonpackage_sendfs;
5542			VERIFY(finalsnap == NULL);
5543		}
5544		return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
5545		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
5546		    finalsnap, cmdprops));
5547	} else {
5548		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
5549		    DMU_COMPOUNDSTREAM);
5550		return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
5551		    &zcksum, top_zfs, cmdprops));
5552	}
5553}
5554
5555/*
5556 * Restores a backup of tosnap from the file descriptor specified by infd.
5557 * Return 0 on total success, -2 if some things couldn't be
5558 * destroyed/renamed/promoted, -1 if some things couldn't be received.
5559 * (-1 will override -2, if -1 and the resumable flag was specified the
5560 * transfer can be resumed if the sending side supports it).
5561 */
5562int
5563zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
5564    recvflags_t *flags, int infd, avl_tree_t *stream_avl)
5565{
5566	char *top_zfs = NULL;
5567	int err;
5568	struct stat sb;
5569	const char *originsnap = NULL;
5570
5571	/*
5572	 * The only way fstat can fail is if we do not have a valid file
5573	 * descriptor.
5574	 */
5575	if (fstat(infd, &sb) == -1) {
5576		perror("fstat");
5577		return (-2);
5578	}
5579
5580	if (props) {
5581		err = nvlist_lookup_string(props, "origin", &originsnap);
5582		if (err && err != ENOENT)
5583			return (err);
5584	}
5585
5586	err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
5587	    stream_avl, &top_zfs, NULL, props);
5588
5589	if (err == 0 && !flags->nomount && flags->domount && top_zfs) {
5590		zfs_handle_t *zhp = NULL;
5591		prop_changelist_t *clp = NULL;
5592
5593		zhp = zfs_open(hdl, top_zfs,
5594		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
5595		if (zhp == NULL) {
5596			err = -1;
5597			goto out;
5598		} else {
5599			if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
5600				zfs_close(zhp);
5601				goto out;
5602			}
5603
5604			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
5605			    CL_GATHER_MOUNT_ALWAYS,
5606			    flags->forceunmount ? MS_FORCE : 0);
5607			zfs_close(zhp);
5608			if (clp == NULL) {
5609				err = -1;
5610				goto out;
5611			}
5612
5613			/* mount and share received datasets */
5614			err = changelist_postfix(clp);
5615			changelist_free(clp);
5616			if (err != 0)
5617				err = -1;
5618		}
5619	}
5620
5621out:
5622	if (top_zfs)
5623		free(top_zfs);
5624
5625	return (err);
5626}
5627