libzfs_sendrecv.c revision 268649
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27 * All rights reserved.
28 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 */
30
31#include <assert.h>
32#include <ctype.h>
33#include <errno.h>
34#include <libintl.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <strings.h>
38#include <unistd.h>
39#include <stddef.h>
40#include <fcntl.h>
41#include <sys/param.h>
42#include <sys/mount.h>
43#include <pthread.h>
44#include <umem.h>
45#include <time.h>
46
47#include <libzfs.h>
48#include <libzfs_core.h>
49
50#include "zfs_namecheck.h"
51#include "zfs_prop.h"
52#include "zfs_fletcher.h"
53#include "libzfs_impl.h"
54#include <sha2.h>
55#include <sys/zio_checksum.h>
56#include <sys/ddt.h>
57
58#ifdef __FreeBSD__
59extern int zfs_ioctl_version;
60#endif
61
62/* in libzfs_dataset.c */
63extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
64/* We need to use something for ENODATA. */
65#define	ENODATA	EIDRM
66
67static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
68    int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
69
70static const zio_cksum_t zero_cksum = { 0 };
71
72typedef struct dedup_arg {
73	int	inputfd;
74	int	outputfd;
75	libzfs_handle_t  *dedup_hdl;
76} dedup_arg_t;
77
78typedef struct progress_arg {
79	zfs_handle_t *pa_zhp;
80	int pa_fd;
81	boolean_t pa_parsable;
82} progress_arg_t;
83
84typedef struct dataref {
85	uint64_t ref_guid;
86	uint64_t ref_object;
87	uint64_t ref_offset;
88} dataref_t;
89
90typedef struct dedup_entry {
91	struct dedup_entry	*dde_next;
92	zio_cksum_t dde_chksum;
93	uint64_t dde_prop;
94	dataref_t dde_ref;
95} dedup_entry_t;
96
97#define	MAX_DDT_PHYSMEM_PERCENT		20
98#define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
99
100typedef struct dedup_table {
101	dedup_entry_t	**dedup_hash_array;
102	umem_cache_t	*ddecache;
103	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
104	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
105	uint64_t	ddt_count;
106	int		numhashbits;
107	boolean_t	ddt_full;
108} dedup_table_t;
109
110static int
111high_order_bit(uint64_t n)
112{
113	int count;
114
115	for (count = 0; n != 0; count++)
116		n >>= 1;
117	return (count);
118}
119
120static size_t
121ssread(void *buf, size_t len, FILE *stream)
122{
123	size_t outlen;
124
125	if ((outlen = fread(buf, len, 1, stream)) == 0)
126		return (0);
127
128	return (outlen);
129}
130
131static void
132ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
133    zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
134{
135	dedup_entry_t	*dde;
136
137	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
138		if (ddt->ddt_full == B_FALSE) {
139			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
140			    "Dedup table full.  Deduplication will continue "
141			    "with existing table entries"));
142			ddt->ddt_full = B_TRUE;
143		}
144		return;
145	}
146
147	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
148	    != NULL) {
149		assert(*ddepp == NULL);
150		dde->dde_next = NULL;
151		dde->dde_chksum = *cs;
152		dde->dde_prop = prop;
153		dde->dde_ref = *dr;
154		*ddepp = dde;
155		ddt->cur_ddt_size += sizeof (dedup_entry_t);
156		ddt->ddt_count++;
157	}
158}
159
160/*
161 * Using the specified dedup table, do a lookup for an entry with
162 * the checksum cs.  If found, return the block's reference info
163 * in *dr. Otherwise, insert a new entry in the dedup table, using
164 * the reference information specified by *dr.
165 *
166 * return value:  true - entry was found
167 *		  false - entry was not found
168 */
169static boolean_t
170ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
171    uint64_t prop, dataref_t *dr)
172{
173	uint32_t hashcode;
174	dedup_entry_t **ddepp;
175
176	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
177
178	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
179	    ddepp = &((*ddepp)->dde_next)) {
180		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
181		    (*ddepp)->dde_prop == prop) {
182			*dr = (*ddepp)->dde_ref;
183			return (B_TRUE);
184		}
185	}
186	ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
187	return (B_FALSE);
188}
189
190static int
191cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
192{
193	fletcher_4_incremental_native(buf, len, zc);
194	return (write(outfd, buf, len));
195}
196
197/*
198 * This function is started in a separate thread when the dedup option
199 * has been requested.  The main send thread determines the list of
200 * snapshots to be included in the send stream and makes the ioctl calls
201 * for each one.  But instead of having the ioctl send the output to the
202 * the output fd specified by the caller of zfs_send()), the
203 * ioctl is told to direct the output to a pipe, which is read by the
204 * alternate thread running THIS function.  This function does the
205 * dedup'ing by:
206 *  1. building a dedup table (the DDT)
207 *  2. doing checksums on each data block and inserting a record in the DDT
208 *  3. looking for matching checksums, and
209 *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
210 *      a duplicate block is found.
211 * The output of this function then goes to the output fd requested
212 * by the caller of zfs_send().
213 */
214static void *
215cksummer(void *arg)
216{
217	dedup_arg_t *dda = arg;
218	char *buf = malloc(1<<20);
219	dmu_replay_record_t thedrr;
220	dmu_replay_record_t *drr = &thedrr;
221	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
222	struct drr_end *drre = &thedrr.drr_u.drr_end;
223	struct drr_object *drro = &thedrr.drr_u.drr_object;
224	struct drr_write *drrw = &thedrr.drr_u.drr_write;
225	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
226	struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
227	FILE *ofp;
228	int outfd;
229	dmu_replay_record_t wbr_drr = {0};
230	struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
231	dedup_table_t ddt;
232	zio_cksum_t stream_cksum;
233	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
234	uint64_t numbuckets;
235
236	ddt.max_ddt_size =
237	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
238	    SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
239
240	numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
241
242	/*
243	 * numbuckets must be a power of 2.  Increase number to
244	 * a power of 2 if necessary.
245	 */
246	if (!ISP2(numbuckets))
247		numbuckets = 1 << high_order_bit(numbuckets);
248
249	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
250	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
251	    NULL, NULL, NULL, NULL, NULL, 0);
252	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
253	ddt.numhashbits = high_order_bit(numbuckets) - 1;
254	ddt.ddt_full = B_FALSE;
255
256	/* Initialize the write-by-reference block. */
257	wbr_drr.drr_type = DRR_WRITE_BYREF;
258	wbr_drr.drr_payloadlen = 0;
259
260	outfd = dda->outputfd;
261	ofp = fdopen(dda->inputfd, "r");
262	while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
263
264		switch (drr->drr_type) {
265		case DRR_BEGIN:
266		{
267			int	fflags;
268			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
269
270			/* set the DEDUP feature flag for this stream */
271			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
272			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
273			    DMU_BACKUP_FEATURE_DEDUPPROPS);
274			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
275
276			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
277			    &stream_cksum, outfd) == -1)
278				goto out;
279			if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
280			    DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
281				int sz = drr->drr_payloadlen;
282
283				if (sz > 1<<20) {
284					free(buf);
285					buf = malloc(sz);
286				}
287				(void) ssread(buf, sz, ofp);
288				if (ferror(stdin))
289					perror("fread");
290				if (cksum_and_write(buf, sz, &stream_cksum,
291				    outfd) == -1)
292					goto out;
293			}
294			break;
295		}
296
297		case DRR_END:
298		{
299			/* use the recalculated checksum */
300			ZIO_SET_CHECKSUM(&drre->drr_checksum,
301			    stream_cksum.zc_word[0], stream_cksum.zc_word[1],
302			    stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
303			if ((write(outfd, drr,
304			    sizeof (dmu_replay_record_t))) == -1)
305				goto out;
306			break;
307		}
308
309		case DRR_OBJECT:
310		{
311			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
312			    &stream_cksum, outfd) == -1)
313				goto out;
314			if (drro->drr_bonuslen > 0) {
315				(void) ssread(buf,
316				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
317				    ofp);
318				if (cksum_and_write(buf,
319				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
320				    &stream_cksum, outfd) == -1)
321					goto out;
322			}
323			break;
324		}
325
326		case DRR_SPILL:
327		{
328			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
329			    &stream_cksum, outfd) == -1)
330				goto out;
331			(void) ssread(buf, drrs->drr_length, ofp);
332			if (cksum_and_write(buf, drrs->drr_length,
333			    &stream_cksum, outfd) == -1)
334				goto out;
335			break;
336		}
337
338		case DRR_FREEOBJECTS:
339		{
340			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
341			    &stream_cksum, outfd) == -1)
342				goto out;
343			break;
344		}
345
346		case DRR_WRITE:
347		{
348			dataref_t	dataref;
349
350			(void) ssread(buf, drrw->drr_length, ofp);
351
352			/*
353			 * Use the existing checksum if it's dedup-capable,
354			 * else calculate a SHA256 checksum for it.
355			 */
356
357			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
358			    zero_cksum) ||
359			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
360				SHA256_CTX	ctx;
361				zio_cksum_t	tmpsha256;
362
363				SHA256Init(&ctx);
364				SHA256Update(&ctx, buf, drrw->drr_length);
365				SHA256Final(&tmpsha256, &ctx);
366				drrw->drr_key.ddk_cksum.zc_word[0] =
367				    BE_64(tmpsha256.zc_word[0]);
368				drrw->drr_key.ddk_cksum.zc_word[1] =
369				    BE_64(tmpsha256.zc_word[1]);
370				drrw->drr_key.ddk_cksum.zc_word[2] =
371				    BE_64(tmpsha256.zc_word[2]);
372				drrw->drr_key.ddk_cksum.zc_word[3] =
373				    BE_64(tmpsha256.zc_word[3]);
374				drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
375				drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
376			}
377
378			dataref.ref_guid = drrw->drr_toguid;
379			dataref.ref_object = drrw->drr_object;
380			dataref.ref_offset = drrw->drr_offset;
381
382			if (ddt_update(dda->dedup_hdl, &ddt,
383			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
384			    &dataref)) {
385				/* block already present in stream */
386				wbr_drrr->drr_object = drrw->drr_object;
387				wbr_drrr->drr_offset = drrw->drr_offset;
388				wbr_drrr->drr_length = drrw->drr_length;
389				wbr_drrr->drr_toguid = drrw->drr_toguid;
390				wbr_drrr->drr_refguid = dataref.ref_guid;
391				wbr_drrr->drr_refobject =
392				    dataref.ref_object;
393				wbr_drrr->drr_refoffset =
394				    dataref.ref_offset;
395
396				wbr_drrr->drr_checksumtype =
397				    drrw->drr_checksumtype;
398				wbr_drrr->drr_checksumflags =
399				    drrw->drr_checksumtype;
400				wbr_drrr->drr_key.ddk_cksum =
401				    drrw->drr_key.ddk_cksum;
402				wbr_drrr->drr_key.ddk_prop =
403				    drrw->drr_key.ddk_prop;
404
405				if (cksum_and_write(&wbr_drr,
406				    sizeof (dmu_replay_record_t), &stream_cksum,
407				    outfd) == -1)
408					goto out;
409			} else {
410				/* block not previously seen */
411				if (cksum_and_write(drr,
412				    sizeof (dmu_replay_record_t), &stream_cksum,
413				    outfd) == -1)
414					goto out;
415				if (cksum_and_write(buf,
416				    drrw->drr_length,
417				    &stream_cksum, outfd) == -1)
418					goto out;
419			}
420			break;
421		}
422
423		case DRR_WRITE_EMBEDDED:
424		{
425			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
426			    &stream_cksum, outfd) == -1)
427				goto out;
428			(void) ssread(buf,
429			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
430			if (cksum_and_write(buf,
431			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
432			    &stream_cksum, outfd) == -1)
433				goto out;
434			break;
435		}
436
437		case DRR_FREE:
438		{
439			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
440			    &stream_cksum, outfd) == -1)
441				goto out;
442			break;
443		}
444
445		default:
446			(void) printf("INVALID record type 0x%x\n",
447			    drr->drr_type);
448			/* should never happen, so assert */
449			assert(B_FALSE);
450		}
451	}
452out:
453	umem_cache_destroy(ddt.ddecache);
454	free(ddt.dedup_hash_array);
455	free(buf);
456	(void) fclose(ofp);
457
458	return (NULL);
459}
460
461/*
462 * Routines for dealing with the AVL tree of fs-nvlists
463 */
464typedef struct fsavl_node {
465	avl_node_t fn_node;
466	nvlist_t *fn_nvfs;
467	char *fn_snapname;
468	uint64_t fn_guid;
469} fsavl_node_t;
470
471static int
472fsavl_compare(const void *arg1, const void *arg2)
473{
474	const fsavl_node_t *fn1 = arg1;
475	const fsavl_node_t *fn2 = arg2;
476
477	if (fn1->fn_guid > fn2->fn_guid)
478		return (+1);
479	else if (fn1->fn_guid < fn2->fn_guid)
480		return (-1);
481	else
482		return (0);
483}
484
485/*
486 * Given the GUID of a snapshot, find its containing filesystem and
487 * (optionally) name.
488 */
489static nvlist_t *
490fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
491{
492	fsavl_node_t fn_find;
493	fsavl_node_t *fn;
494
495	fn_find.fn_guid = snapguid;
496
497	fn = avl_find(avl, &fn_find, NULL);
498	if (fn) {
499		if (snapname)
500			*snapname = fn->fn_snapname;
501		return (fn->fn_nvfs);
502	}
503	return (NULL);
504}
505
506static void
507fsavl_destroy(avl_tree_t *avl)
508{
509	fsavl_node_t *fn;
510	void *cookie;
511
512	if (avl == NULL)
513		return;
514
515	cookie = NULL;
516	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
517		free(fn);
518	avl_destroy(avl);
519	free(avl);
520}
521
522/*
523 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
524 */
525static avl_tree_t *
526fsavl_create(nvlist_t *fss)
527{
528	avl_tree_t *fsavl;
529	nvpair_t *fselem = NULL;
530
531	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
532		return (NULL);
533
534	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
535	    offsetof(fsavl_node_t, fn_node));
536
537	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
538		nvlist_t *nvfs, *snaps;
539		nvpair_t *snapelem = NULL;
540
541		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
542		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
543
544		while ((snapelem =
545		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
546			fsavl_node_t *fn;
547			uint64_t guid;
548
549			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
550			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
551				fsavl_destroy(fsavl);
552				return (NULL);
553			}
554			fn->fn_nvfs = nvfs;
555			fn->fn_snapname = nvpair_name(snapelem);
556			fn->fn_guid = guid;
557
558			/*
559			 * Note: if there are multiple snaps with the
560			 * same GUID, we ignore all but one.
561			 */
562			if (avl_find(fsavl, fn, NULL) == NULL)
563				avl_add(fsavl, fn);
564			else
565				free(fn);
566		}
567	}
568
569	return (fsavl);
570}
571
572/*
573 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
574 */
575typedef struct send_data {
576	uint64_t parent_fromsnap_guid;
577	nvlist_t *parent_snaps;
578	nvlist_t *fss;
579	nvlist_t *snapprops;
580	const char *fromsnap;
581	const char *tosnap;
582	boolean_t recursive;
583
584	/*
585	 * The header nvlist is of the following format:
586	 * {
587	 *   "tosnap" -> string
588	 *   "fromsnap" -> string (if incremental)
589	 *   "fss" -> {
590	 *	id -> {
591	 *
592	 *	 "name" -> string (full name; for debugging)
593	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
594	 *
595	 *	 "props" -> { name -> value (only if set here) }
596	 *	 "snaps" -> { name (lastname) -> number (guid) }
597	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
598	 *
599	 *	 "origin" -> number (guid) (if clone)
600	 *	 "sent" -> boolean (not on-disk)
601	 *	}
602	 *   }
603	 * }
604	 *
605	 */
606} send_data_t;
607
608static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
609
610static int
611send_iterate_snap(zfs_handle_t *zhp, void *arg)
612{
613	send_data_t *sd = arg;
614	uint64_t guid = zhp->zfs_dmustats.dds_guid;
615	char *snapname;
616	nvlist_t *nv;
617
618	snapname = strrchr(zhp->zfs_name, '@')+1;
619
620	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
621	/*
622	 * NB: if there is no fromsnap here (it's a newly created fs in
623	 * an incremental replication), we will substitute the tosnap.
624	 */
625	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
626	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
627	    strcmp(snapname, sd->tosnap) == 0)) {
628		sd->parent_fromsnap_guid = guid;
629	}
630
631	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
632	send_iterate_prop(zhp, nv);
633	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
634	nvlist_free(nv);
635
636	zfs_close(zhp);
637	return (0);
638}
639
640static void
641send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
642{
643	nvpair_t *elem = NULL;
644
645	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
646		char *propname = nvpair_name(elem);
647		zfs_prop_t prop = zfs_name_to_prop(propname);
648		nvlist_t *propnv;
649
650		if (!zfs_prop_user(propname)) {
651			/*
652			 * Realistically, this should never happen.  However,
653			 * we want the ability to add DSL properties without
654			 * needing to make incompatible version changes.  We
655			 * need to ignore unknown properties to allow older
656			 * software to still send datasets containing these
657			 * properties, with the unknown properties elided.
658			 */
659			if (prop == ZPROP_INVAL)
660				continue;
661
662			if (zfs_prop_readonly(prop))
663				continue;
664		}
665
666		verify(nvpair_value_nvlist(elem, &propnv) == 0);
667		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
668		    prop == ZFS_PROP_REFQUOTA ||
669		    prop == ZFS_PROP_REFRESERVATION) {
670			char *source;
671			uint64_t value;
672			verify(nvlist_lookup_uint64(propnv,
673			    ZPROP_VALUE, &value) == 0);
674			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
675				continue;
676			/*
677			 * May have no source before SPA_VERSION_RECVD_PROPS,
678			 * but is still modifiable.
679			 */
680			if (nvlist_lookup_string(propnv,
681			    ZPROP_SOURCE, &source) == 0) {
682				if ((strcmp(source, zhp->zfs_name) != 0) &&
683				    (strcmp(source,
684				    ZPROP_SOURCE_VAL_RECVD) != 0))
685					continue;
686			}
687		} else {
688			char *source;
689			if (nvlist_lookup_string(propnv,
690			    ZPROP_SOURCE, &source) != 0)
691				continue;
692			if ((strcmp(source, zhp->zfs_name) != 0) &&
693			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
694				continue;
695		}
696
697		if (zfs_prop_user(propname) ||
698		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
699			char *value;
700			verify(nvlist_lookup_string(propnv,
701			    ZPROP_VALUE, &value) == 0);
702			VERIFY(0 == nvlist_add_string(nv, propname, value));
703		} else {
704			uint64_t value;
705			verify(nvlist_lookup_uint64(propnv,
706			    ZPROP_VALUE, &value) == 0);
707			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
708		}
709	}
710}
711
712/*
713 * recursively generate nvlists describing datasets.  See comment
714 * for the data structure send_data_t above for description of contents
715 * of the nvlist.
716 */
717static int
718send_iterate_fs(zfs_handle_t *zhp, void *arg)
719{
720	send_data_t *sd = arg;
721	nvlist_t *nvfs, *nv;
722	int rv = 0;
723	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
724	uint64_t guid = zhp->zfs_dmustats.dds_guid;
725	char guidstring[64];
726
727	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
728	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
729	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
730	    sd->parent_fromsnap_guid));
731
732	if (zhp->zfs_dmustats.dds_origin[0]) {
733		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
734		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
735		if (origin == NULL)
736			return (-1);
737		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
738		    origin->zfs_dmustats.dds_guid));
739	}
740
741	/* iterate over props */
742	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
743	send_iterate_prop(zhp, nv);
744	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
745	nvlist_free(nv);
746
747	/* iterate over snaps, and set sd->parent_fromsnap_guid */
748	sd->parent_fromsnap_guid = 0;
749	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
750	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
751	(void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
752	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
753	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
754	nvlist_free(sd->parent_snaps);
755	nvlist_free(sd->snapprops);
756
757	/* add this fs to nvlist */
758	(void) snprintf(guidstring, sizeof (guidstring),
759	    "0x%llx", (longlong_t)guid);
760	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
761	nvlist_free(nvfs);
762
763	/* iterate over children */
764	if (sd->recursive)
765		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
766
767	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
768
769	zfs_close(zhp);
770	return (rv);
771}
772
773static int
774gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
775    const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
776{
777	zfs_handle_t *zhp;
778	send_data_t sd = { 0 };
779	int error;
780
781	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
782	if (zhp == NULL)
783		return (EZFS_BADTYPE);
784
785	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
786	sd.fromsnap = fromsnap;
787	sd.tosnap = tosnap;
788	sd.recursive = recursive;
789
790	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
791		nvlist_free(sd.fss);
792		if (avlp != NULL)
793			*avlp = NULL;
794		*nvlp = NULL;
795		return (error);
796	}
797
798	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
799		nvlist_free(sd.fss);
800		*nvlp = NULL;
801		return (EZFS_NOMEM);
802	}
803
804	*nvlp = sd.fss;
805	return (0);
806}
807
808/*
809 * Routines specific to "zfs send"
810 */
811typedef struct send_dump_data {
812	/* these are all just the short snapname (the part after the @) */
813	const char *fromsnap;
814	const char *tosnap;
815	char prevsnap[ZFS_MAXNAMELEN];
816	uint64_t prevsnap_obj;
817	boolean_t seenfrom, seento, replicate, doall, fromorigin;
818	boolean_t verbose, dryrun, parsable, progress, embed_data;
819	int outfd;
820	boolean_t err;
821	nvlist_t *fss;
822	nvlist_t *snapholds;
823	avl_tree_t *fsavl;
824	snapfilter_cb_t *filter_cb;
825	void *filter_cb_arg;
826	nvlist_t *debugnv;
827	char holdtag[ZFS_MAXNAMELEN];
828	int cleanup_fd;
829	uint64_t size;
830} send_dump_data_t;
831
832static int
833estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
834    boolean_t fromorigin, uint64_t *sizep)
835{
836	zfs_cmd_t zc = { 0 };
837	libzfs_handle_t *hdl = zhp->zfs_hdl;
838
839	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
840	assert(fromsnap_obj == 0 || !fromorigin);
841
842	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
843	zc.zc_obj = fromorigin;
844	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
845	zc.zc_fromobj = fromsnap_obj;
846	zc.zc_guid = 1;  /* estimate flag */
847
848	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
849		char errbuf[1024];
850		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
851		    "warning: cannot estimate space for '%s'"), zhp->zfs_name);
852
853		switch (errno) {
854		case EXDEV:
855			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
856			    "not an earlier snapshot from the same fs"));
857			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
858
859		case ENOENT:
860			if (zfs_dataset_exists(hdl, zc.zc_name,
861			    ZFS_TYPE_SNAPSHOT)) {
862				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
863				    "incremental source (@%s) does not exist"),
864				    zc.zc_value);
865			}
866			return (zfs_error(hdl, EZFS_NOENT, errbuf));
867
868		case EDQUOT:
869		case EFBIG:
870		case EIO:
871		case ENOLINK:
872		case ENOSPC:
873		case ENXIO:
874		case EPIPE:
875		case ERANGE:
876		case EFAULT:
877		case EROFS:
878			zfs_error_aux(hdl, strerror(errno));
879			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
880
881		default:
882			return (zfs_standard_error(hdl, errno, errbuf));
883		}
884	}
885
886	*sizep = zc.zc_objset_type;
887
888	return (0);
889}
890
891/*
892 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
893 * NULL) to the file descriptor specified by outfd.
894 */
895static int
896dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
897    boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
898    nvlist_t *debugnv)
899{
900	zfs_cmd_t zc = { 0 };
901	libzfs_handle_t *hdl = zhp->zfs_hdl;
902	nvlist_t *thisdbg;
903
904	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
905	assert(fromsnap_obj == 0 || !fromorigin);
906
907	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
908	zc.zc_cookie = outfd;
909	zc.zc_obj = fromorigin;
910	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
911	zc.zc_fromobj = fromsnap_obj;
912	zc.zc_flags = flags;
913
914	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
915	if (fromsnap && fromsnap[0] != '\0') {
916		VERIFY(0 == nvlist_add_string(thisdbg,
917		    "fromsnap", fromsnap));
918	}
919
920	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
921		char errbuf[1024];
922		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
923		    "warning: cannot send '%s'"), zhp->zfs_name);
924
925		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
926		if (debugnv) {
927			VERIFY(0 == nvlist_add_nvlist(debugnv,
928			    zhp->zfs_name, thisdbg));
929		}
930		nvlist_free(thisdbg);
931
932		switch (errno) {
933		case EXDEV:
934			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
935			    "not an earlier snapshot from the same fs"));
936			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
937
938		case ENOENT:
939			if (zfs_dataset_exists(hdl, zc.zc_name,
940			    ZFS_TYPE_SNAPSHOT)) {
941				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
942				    "incremental source (@%s) does not exist"),
943				    zc.zc_value);
944			}
945			return (zfs_error(hdl, EZFS_NOENT, errbuf));
946
947		case EDQUOT:
948		case EFBIG:
949		case EIO:
950		case ENOLINK:
951		case ENOSPC:
952#ifdef sun
953		case ENOSTR:
954#endif
955		case ENXIO:
956		case EPIPE:
957		case ERANGE:
958		case EFAULT:
959		case EROFS:
960			zfs_error_aux(hdl, strerror(errno));
961			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
962
963		default:
964			return (zfs_standard_error(hdl, errno, errbuf));
965		}
966	}
967
968	if (debugnv)
969		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
970	nvlist_free(thisdbg);
971
972	return (0);
973}
974
975static void
976gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
977{
978	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
979
980	/*
981	 * zfs_send() only sets snapholds for sends that need them,
982	 * e.g. replication and doall.
983	 */
984	if (sdd->snapholds == NULL)
985		return;
986
987	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
988}
989
990static void *
991send_progress_thread(void *arg)
992{
993	progress_arg_t *pa = arg;
994
995	zfs_cmd_t zc = { 0 };
996	zfs_handle_t *zhp = pa->pa_zhp;
997	libzfs_handle_t *hdl = zhp->zfs_hdl;
998	unsigned long long bytes;
999	char buf[16];
1000
1001	time_t t;
1002	struct tm *tm;
1003
1004	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1005	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1006
1007	if (!pa->pa_parsable)
1008		(void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
1009
1010	/*
1011	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1012	 */
1013	for (;;) {
1014		(void) sleep(1);
1015
1016		zc.zc_cookie = pa->pa_fd;
1017		if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1018			return ((void *)-1);
1019
1020		(void) time(&t);
1021		tm = localtime(&t);
1022		bytes = zc.zc_cookie;
1023
1024		if (pa->pa_parsable) {
1025			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1026			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1027			    bytes, zhp->zfs_name);
1028		} else {
1029			zfs_nicenum(bytes, buf, sizeof (buf));
1030			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1031			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1032			    buf, zhp->zfs_name);
1033		}
1034	}
1035}
1036
1037static int
1038dump_snapshot(zfs_handle_t *zhp, void *arg)
1039{
1040	send_dump_data_t *sdd = arg;
1041	progress_arg_t pa = { 0 };
1042	pthread_t tid;
1043	char *thissnap;
1044	int err;
1045	boolean_t isfromsnap, istosnap, fromorigin;
1046	boolean_t exclude = B_FALSE;
1047
1048	err = 0;
1049	thissnap = strchr(zhp->zfs_name, '@') + 1;
1050	isfromsnap = (sdd->fromsnap != NULL &&
1051	    strcmp(sdd->fromsnap, thissnap) == 0);
1052
1053	if (!sdd->seenfrom && isfromsnap) {
1054		gather_holds(zhp, sdd);
1055		sdd->seenfrom = B_TRUE;
1056		(void) strcpy(sdd->prevsnap, thissnap);
1057		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1058		zfs_close(zhp);
1059		return (0);
1060	}
1061
1062	if (sdd->seento || !sdd->seenfrom) {
1063		zfs_close(zhp);
1064		return (0);
1065	}
1066
1067	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1068	if (istosnap)
1069		sdd->seento = B_TRUE;
1070
1071	if (!sdd->doall && !isfromsnap && !istosnap) {
1072		if (sdd->replicate) {
1073			char *snapname;
1074			nvlist_t *snapprops;
1075			/*
1076			 * Filter out all intermediate snapshots except origin
1077			 * snapshots needed to replicate clones.
1078			 */
1079			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1080			    zhp->zfs_dmustats.dds_guid, &snapname);
1081
1082			VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1083			    "snapprops", &snapprops));
1084			VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1085			    thissnap, &snapprops));
1086			exclude = !nvlist_exists(snapprops, "is_clone_origin");
1087		} else {
1088			exclude = B_TRUE;
1089		}
1090	}
1091
1092	/*
1093	 * If a filter function exists, call it to determine whether
1094	 * this snapshot will be sent.
1095	 */
1096	if (exclude || (sdd->filter_cb != NULL &&
1097	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1098		/*
1099		 * This snapshot is filtered out.  Don't send it, and don't
1100		 * set prevsnap_obj, so it will be as if this snapshot didn't
1101		 * exist, and the next accepted snapshot will be sent as
1102		 * an incremental from the last accepted one, or as the
1103		 * first (and full) snapshot in the case of a replication,
1104		 * non-incremental send.
1105		 */
1106		zfs_close(zhp);
1107		return (0);
1108	}
1109
1110	gather_holds(zhp, sdd);
1111	fromorigin = sdd->prevsnap[0] == '\0' &&
1112	    (sdd->fromorigin || sdd->replicate);
1113
1114	if (sdd->verbose) {
1115		uint64_t size;
1116		err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1117		    fromorigin, &size);
1118
1119		if (sdd->parsable) {
1120			if (sdd->prevsnap[0] != '\0') {
1121				(void) fprintf(stderr, "incremental\t%s\t%s",
1122				    sdd->prevsnap, zhp->zfs_name);
1123			} else {
1124				(void) fprintf(stderr, "full\t%s",
1125				    zhp->zfs_name);
1126			}
1127		} else {
1128			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1129			    "send from @%s to %s"),
1130			    sdd->prevsnap, zhp->zfs_name);
1131		}
1132		if (err == 0) {
1133			if (sdd->parsable) {
1134				(void) fprintf(stderr, "\t%llu\n",
1135				    (longlong_t)size);
1136			} else {
1137				char buf[16];
1138				zfs_nicenum(size, buf, sizeof (buf));
1139				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1140				    " estimated size is %s\n"), buf);
1141			}
1142			sdd->size += size;
1143		} else {
1144			(void) fprintf(stderr, "\n");
1145		}
1146	}
1147
1148	if (!sdd->dryrun) {
1149		/*
1150		 * If progress reporting is requested, spawn a new thread to
1151		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1152		 */
1153		if (sdd->progress) {
1154			pa.pa_zhp = zhp;
1155			pa.pa_fd = sdd->outfd;
1156			pa.pa_parsable = sdd->parsable;
1157
1158			if (err = pthread_create(&tid, NULL,
1159			    send_progress_thread, &pa)) {
1160				zfs_close(zhp);
1161				return (err);
1162			}
1163		}
1164
1165		enum lzc_send_flags flags = 0;
1166		if (sdd->embed_data)
1167			flags |= LZC_SEND_FLAG_EMBED_DATA;
1168
1169		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1170		    fromorigin, sdd->outfd, flags, sdd->debugnv);
1171
1172		if (sdd->progress) {
1173			(void) pthread_cancel(tid);
1174			(void) pthread_join(tid, NULL);
1175		}
1176	}
1177
1178	(void) strcpy(sdd->prevsnap, thissnap);
1179	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1180	zfs_close(zhp);
1181	return (err);
1182}
1183
1184static int
1185dump_filesystem(zfs_handle_t *zhp, void *arg)
1186{
1187	int rv = 0;
1188	send_dump_data_t *sdd = arg;
1189	boolean_t missingfrom = B_FALSE;
1190	zfs_cmd_t zc = { 0 };
1191
1192	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1193	    zhp->zfs_name, sdd->tosnap);
1194	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1195		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1196		    "WARNING: could not send %s@%s: does not exist\n"),
1197		    zhp->zfs_name, sdd->tosnap);
1198		sdd->err = B_TRUE;
1199		return (0);
1200	}
1201
1202	if (sdd->replicate && sdd->fromsnap) {
1203		/*
1204		 * If this fs does not have fromsnap, and we're doing
1205		 * recursive, we need to send a full stream from the
1206		 * beginning (or an incremental from the origin if this
1207		 * is a clone).  If we're doing non-recursive, then let
1208		 * them get the error.
1209		 */
1210		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1211		    zhp->zfs_name, sdd->fromsnap);
1212		if (ioctl(zhp->zfs_hdl->libzfs_fd,
1213		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1214			missingfrom = B_TRUE;
1215		}
1216	}
1217
1218	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1219	sdd->prevsnap_obj = 0;
1220	if (sdd->fromsnap == NULL || missingfrom)
1221		sdd->seenfrom = B_TRUE;
1222
1223	rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1224	if (!sdd->seenfrom) {
1225		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1226		    "WARNING: could not send %s@%s:\n"
1227		    "incremental source (%s@%s) does not exist\n"),
1228		    zhp->zfs_name, sdd->tosnap,
1229		    zhp->zfs_name, sdd->fromsnap);
1230		sdd->err = B_TRUE;
1231	} else if (!sdd->seento) {
1232		if (sdd->fromsnap) {
1233			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1234			    "WARNING: could not send %s@%s:\n"
1235			    "incremental source (%s@%s) "
1236			    "is not earlier than it\n"),
1237			    zhp->zfs_name, sdd->tosnap,
1238			    zhp->zfs_name, sdd->fromsnap);
1239		} else {
1240			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1241			    "WARNING: "
1242			    "could not send %s@%s: does not exist\n"),
1243			    zhp->zfs_name, sdd->tosnap);
1244		}
1245		sdd->err = B_TRUE;
1246	}
1247
1248	return (rv);
1249}
1250
1251static int
1252dump_filesystems(zfs_handle_t *rzhp, void *arg)
1253{
1254	send_dump_data_t *sdd = arg;
1255	nvpair_t *fspair;
1256	boolean_t needagain, progress;
1257
1258	if (!sdd->replicate)
1259		return (dump_filesystem(rzhp, sdd));
1260
1261	/* Mark the clone origin snapshots. */
1262	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1263	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1264		nvlist_t *nvfs;
1265		uint64_t origin_guid = 0;
1266
1267		VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1268		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1269		if (origin_guid != 0) {
1270			char *snapname;
1271			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1272			    origin_guid, &snapname);
1273			if (origin_nv != NULL) {
1274				nvlist_t *snapprops;
1275				VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1276				    "snapprops", &snapprops));
1277				VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1278				    snapname, &snapprops));
1279				VERIFY(0 == nvlist_add_boolean(
1280				    snapprops, "is_clone_origin"));
1281			}
1282		}
1283	}
1284again:
1285	needagain = progress = B_FALSE;
1286	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1287	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1288		nvlist_t *fslist, *parent_nv;
1289		char *fsname;
1290		zfs_handle_t *zhp;
1291		int err;
1292		uint64_t origin_guid = 0;
1293		uint64_t parent_guid = 0;
1294
1295		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1296		if (nvlist_lookup_boolean(fslist, "sent") == 0)
1297			continue;
1298
1299		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1300		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1301		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1302		    &parent_guid);
1303
1304		if (parent_guid != 0) {
1305			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1306			if (!nvlist_exists(parent_nv, "sent")) {
1307				/* parent has not been sent; skip this one */
1308				needagain = B_TRUE;
1309				continue;
1310			}
1311		}
1312
1313		if (origin_guid != 0) {
1314			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1315			    origin_guid, NULL);
1316			if (origin_nv != NULL &&
1317			    !nvlist_exists(origin_nv, "sent")) {
1318				/*
1319				 * origin has not been sent yet;
1320				 * skip this clone.
1321				 */
1322				needagain = B_TRUE;
1323				continue;
1324			}
1325		}
1326
1327		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1328		if (zhp == NULL)
1329			return (-1);
1330		err = dump_filesystem(zhp, sdd);
1331		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1332		progress = B_TRUE;
1333		zfs_close(zhp);
1334		if (err)
1335			return (err);
1336	}
1337	if (needagain) {
1338		assert(progress);
1339		goto again;
1340	}
1341
1342	/* clean out the sent flags in case we reuse this fss */
1343	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1344	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1345		nvlist_t *fslist;
1346
1347		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1348		(void) nvlist_remove_all(fslist, "sent");
1349	}
1350
1351	return (0);
1352}
1353
1354/*
1355 * Generate a send stream for the dataset identified by the argument zhp.
1356 *
1357 * The content of the send stream is the snapshot identified by
1358 * 'tosnap'.  Incremental streams are requested in two ways:
1359 *     - from the snapshot identified by "fromsnap" (if non-null) or
1360 *     - from the origin of the dataset identified by zhp, which must
1361 *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
1362 *	 is TRUE.
1363 *
1364 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1365 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1366 * if "replicate" is set.  If "doall" is set, dump all the intermediate
1367 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1368 * case too. If "props" is set, send properties.
1369 */
1370int
1371zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1372    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1373    void *cb_arg, nvlist_t **debugnvp)
1374{
1375	char errbuf[1024];
1376	send_dump_data_t sdd = { 0 };
1377	int err = 0;
1378	nvlist_t *fss = NULL;
1379	avl_tree_t *fsavl = NULL;
1380	static uint64_t holdseq;
1381	int spa_version;
1382	pthread_t tid = 0;
1383	int pipefd[2];
1384	dedup_arg_t dda = { 0 };
1385	int featureflags = 0;
1386
1387	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1388	    "cannot send '%s'"), zhp->zfs_name);
1389
1390	if (fromsnap && fromsnap[0] == '\0') {
1391		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1392		    "zero-length incremental source"));
1393		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1394	}
1395
1396	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1397		uint64_t version;
1398		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1399		if (version >= ZPL_VERSION_SA) {
1400			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1401		}
1402	}
1403
1404	if (flags->dedup && !flags->dryrun) {
1405		featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1406		    DMU_BACKUP_FEATURE_DEDUPPROPS);
1407		if (err = pipe(pipefd)) {
1408			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1409			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1410			    errbuf));
1411		}
1412		dda.outputfd = outfd;
1413		dda.inputfd = pipefd[1];
1414		dda.dedup_hdl = zhp->zfs_hdl;
1415		if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
1416			(void) close(pipefd[0]);
1417			(void) close(pipefd[1]);
1418			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1419			return (zfs_error(zhp->zfs_hdl,
1420			    EZFS_THREADCREATEFAILED, errbuf));
1421		}
1422	}
1423
1424	if (flags->replicate || flags->doall || flags->props) {
1425		dmu_replay_record_t drr = { 0 };
1426		char *packbuf = NULL;
1427		size_t buflen = 0;
1428		zio_cksum_t zc = { 0 };
1429
1430		if (flags->replicate || flags->props) {
1431			nvlist_t *hdrnv;
1432
1433			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1434			if (fromsnap) {
1435				VERIFY(0 == nvlist_add_string(hdrnv,
1436				    "fromsnap", fromsnap));
1437			}
1438			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1439			if (!flags->replicate) {
1440				VERIFY(0 == nvlist_add_boolean(hdrnv,
1441				    "not_recursive"));
1442			}
1443
1444			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1445			    fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1446			if (err)
1447				goto err_out;
1448			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1449			err = nvlist_pack(hdrnv, &packbuf, &buflen,
1450			    NV_ENCODE_XDR, 0);
1451			if (debugnvp)
1452				*debugnvp = hdrnv;
1453			else
1454				nvlist_free(hdrnv);
1455			if (err)
1456				goto stderr_out;
1457		}
1458
1459		if (!flags->dryrun) {
1460			/* write first begin record */
1461			drr.drr_type = DRR_BEGIN;
1462			drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1463			DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1464			    drr_versioninfo, DMU_COMPOUNDSTREAM);
1465			DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1466			    drr_versioninfo, featureflags);
1467			(void) snprintf(drr.drr_u.drr_begin.drr_toname,
1468			    sizeof (drr.drr_u.drr_begin.drr_toname),
1469			    "%s@%s", zhp->zfs_name, tosnap);
1470			drr.drr_payloadlen = buflen;
1471			err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1472
1473			/* write header nvlist */
1474			if (err != -1 && packbuf != NULL) {
1475				err = cksum_and_write(packbuf, buflen, &zc,
1476				    outfd);
1477			}
1478			free(packbuf);
1479			if (err == -1) {
1480				err = errno;
1481				goto stderr_out;
1482			}
1483
1484			/* write end record */
1485			bzero(&drr, sizeof (drr));
1486			drr.drr_type = DRR_END;
1487			drr.drr_u.drr_end.drr_checksum = zc;
1488			err = write(outfd, &drr, sizeof (drr));
1489			if (err == -1) {
1490				err = errno;
1491				goto stderr_out;
1492			}
1493
1494			err = 0;
1495		}
1496	}
1497
1498	/* dump each stream */
1499	sdd.fromsnap = fromsnap;
1500	sdd.tosnap = tosnap;
1501	if (tid != 0)
1502		sdd.outfd = pipefd[0];
1503	else
1504		sdd.outfd = outfd;
1505	sdd.replicate = flags->replicate;
1506	sdd.doall = flags->doall;
1507	sdd.fromorigin = flags->fromorigin;
1508	sdd.fss = fss;
1509	sdd.fsavl = fsavl;
1510	sdd.verbose = flags->verbose;
1511	sdd.parsable = flags->parsable;
1512	sdd.progress = flags->progress;
1513	sdd.dryrun = flags->dryrun;
1514	sdd.embed_data = flags->embed_data;
1515	sdd.filter_cb = filter_func;
1516	sdd.filter_cb_arg = cb_arg;
1517	if (debugnvp)
1518		sdd.debugnv = *debugnvp;
1519
1520	/*
1521	 * Some flags require that we place user holds on the datasets that are
1522	 * being sent so they don't get destroyed during the send. We can skip
1523	 * this step if the pool is imported read-only since the datasets cannot
1524	 * be destroyed.
1525	 */
1526	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1527	    ZPOOL_PROP_READONLY, NULL) &&
1528	    zfs_spa_version(zhp, &spa_version) == 0 &&
1529	    spa_version >= SPA_VERSION_USERREFS &&
1530	    (flags->doall || flags->replicate)) {
1531		++holdseq;
1532		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1533		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1534		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1535		if (sdd.cleanup_fd < 0) {
1536			err = errno;
1537			goto stderr_out;
1538		}
1539		sdd.snapholds = fnvlist_alloc();
1540	} else {
1541		sdd.cleanup_fd = -1;
1542		sdd.snapholds = NULL;
1543	}
1544	if (flags->verbose || sdd.snapholds != NULL) {
1545		/*
1546		 * Do a verbose no-op dry run to get all the verbose output
1547		 * or to gather snapshot hold's before generating any data,
1548		 * then do a non-verbose real run to generate the streams.
1549		 */
1550		sdd.dryrun = B_TRUE;
1551		err = dump_filesystems(zhp, &sdd);
1552
1553		if (err != 0)
1554			goto stderr_out;
1555
1556		if (flags->verbose) {
1557			if (flags->parsable) {
1558				(void) fprintf(stderr, "size\t%llu\n",
1559				    (longlong_t)sdd.size);
1560			} else {
1561				char buf[16];
1562				zfs_nicenum(sdd.size, buf, sizeof (buf));
1563				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1564				    "total estimated size is %s\n"), buf);
1565			}
1566		}
1567
1568		/* Ensure no snaps found is treated as an error. */
1569		if (!sdd.seento) {
1570			err = ENOENT;
1571			goto err_out;
1572		}
1573
1574		/* Skip the second run if dryrun was requested. */
1575		if (flags->dryrun)
1576			goto err_out;
1577
1578		if (sdd.snapholds != NULL) {
1579			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1580			if (err != 0)
1581				goto stderr_out;
1582
1583			fnvlist_free(sdd.snapholds);
1584			sdd.snapholds = NULL;
1585		}
1586
1587		sdd.dryrun = B_FALSE;
1588		sdd.verbose = B_FALSE;
1589	}
1590
1591	err = dump_filesystems(zhp, &sdd);
1592	fsavl_destroy(fsavl);
1593	nvlist_free(fss);
1594
1595	/* Ensure no snaps found is treated as an error. */
1596	if (err == 0 && !sdd.seento)
1597		err = ENOENT;
1598
1599	if (tid != 0) {
1600		if (err != 0)
1601			(void) pthread_cancel(tid);
1602		(void) close(pipefd[0]);
1603		(void) pthread_join(tid, NULL);
1604	}
1605
1606	if (sdd.cleanup_fd != -1) {
1607		VERIFY(0 == close(sdd.cleanup_fd));
1608		sdd.cleanup_fd = -1;
1609	}
1610
1611	if (!flags->dryrun && (flags->replicate || flags->doall ||
1612	    flags->props)) {
1613		/*
1614		 * write final end record.  NB: want to do this even if
1615		 * there was some error, because it might not be totally
1616		 * failed.
1617		 */
1618		dmu_replay_record_t drr = { 0 };
1619		drr.drr_type = DRR_END;
1620		if (write(outfd, &drr, sizeof (drr)) == -1) {
1621			return (zfs_standard_error(zhp->zfs_hdl,
1622			    errno, errbuf));
1623		}
1624	}
1625
1626	return (err || sdd.err);
1627
1628stderr_out:
1629	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1630err_out:
1631	fsavl_destroy(fsavl);
1632	nvlist_free(fss);
1633	fnvlist_free(sdd.snapholds);
1634
1635	if (sdd.cleanup_fd != -1)
1636		VERIFY(0 == close(sdd.cleanup_fd));
1637	if (tid != 0) {
1638		(void) pthread_cancel(tid);
1639		(void) close(pipefd[0]);
1640		(void) pthread_join(tid, NULL);
1641	}
1642	return (err);
1643}
1644
1645int
1646zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
1647    enum lzc_send_flags flags)
1648{
1649	int err;
1650	libzfs_handle_t *hdl = zhp->zfs_hdl;
1651
1652	char errbuf[1024];
1653	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1654	    "warning: cannot send '%s'"), zhp->zfs_name);
1655
1656	err = lzc_send(zhp->zfs_name, from, fd, flags);
1657	if (err != 0) {
1658		switch (errno) {
1659		case EXDEV:
1660			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1661			    "not an earlier snapshot from the same fs"));
1662			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1663
1664		case ENOENT:
1665		case ESRCH:
1666			if (lzc_exists(zhp->zfs_name)) {
1667				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1668				    "incremental source (%s) does not exist"),
1669				    from);
1670			}
1671			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1672
1673		case EBUSY:
1674			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1675			    "target is busy; if a filesystem, "
1676			    "it must not be mounted"));
1677			return (zfs_error(hdl, EZFS_BUSY, errbuf));
1678
1679		case EDQUOT:
1680		case EFBIG:
1681		case EIO:
1682		case ENOLINK:
1683		case ENOSPC:
1684#ifdef illumos
1685		case ENOSTR:
1686#endif
1687		case ENXIO:
1688		case EPIPE:
1689		case ERANGE:
1690		case EFAULT:
1691		case EROFS:
1692			zfs_error_aux(hdl, strerror(errno));
1693			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1694
1695		default:
1696			return (zfs_standard_error(hdl, errno, errbuf));
1697		}
1698	}
1699	return (err != 0);
1700}
1701
1702/*
1703 * Routines specific to "zfs recv"
1704 */
1705
1706static int
1707recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1708    boolean_t byteswap, zio_cksum_t *zc)
1709{
1710	char *cp = buf;
1711	int rv;
1712	int len = ilen;
1713
1714	do {
1715		rv = read(fd, cp, len);
1716		cp += rv;
1717		len -= rv;
1718	} while (rv > 0);
1719
1720	if (rv < 0 || len != 0) {
1721		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1722		    "failed to read from stream"));
1723		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
1724		    "cannot receive")));
1725	}
1726
1727	if (zc) {
1728		if (byteswap)
1729			fletcher_4_incremental_byteswap(buf, ilen, zc);
1730		else
1731			fletcher_4_incremental_native(buf, ilen, zc);
1732	}
1733	return (0);
1734}
1735
1736static int
1737recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
1738    boolean_t byteswap, zio_cksum_t *zc)
1739{
1740	char *buf;
1741	int err;
1742
1743	buf = zfs_alloc(hdl, len);
1744	if (buf == NULL)
1745		return (ENOMEM);
1746
1747	err = recv_read(hdl, fd, buf, len, byteswap, zc);
1748	if (err != 0) {
1749		free(buf);
1750		return (err);
1751	}
1752
1753	err = nvlist_unpack(buf, len, nvp, 0);
1754	free(buf);
1755	if (err != 0) {
1756		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1757		    "stream (malformed nvlist)"));
1758		return (EINVAL);
1759	}
1760	return (0);
1761}
1762
1763static int
1764recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
1765    int baselen, char *newname, recvflags_t *flags)
1766{
1767	static int seq;
1768	zfs_cmd_t zc = { 0 };
1769	int err;
1770	prop_changelist_t *clp;
1771	zfs_handle_t *zhp;
1772
1773	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1774	if (zhp == NULL)
1775		return (-1);
1776	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1777	    flags->force ? MS_FORCE : 0);
1778	zfs_close(zhp);
1779	if (clp == NULL)
1780		return (-1);
1781	err = changelist_prefix(clp);
1782	if (err)
1783		return (err);
1784
1785	zc.zc_objset_type = DMU_OST_ZFS;
1786	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1787
1788	if (tryname) {
1789		(void) strcpy(newname, tryname);
1790
1791		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
1792
1793		if (flags->verbose) {
1794			(void) printf("attempting rename %s to %s\n",
1795			    zc.zc_name, zc.zc_value);
1796		}
1797		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1798		if (err == 0)
1799			changelist_rename(clp, name, tryname);
1800	} else {
1801		err = ENOENT;
1802	}
1803
1804	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
1805		seq++;
1806
1807		(void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u",
1808		    baselen, name, getpid(), seq);
1809		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
1810
1811		if (flags->verbose) {
1812			(void) printf("failed - trying rename %s to %s\n",
1813			    zc.zc_name, zc.zc_value);
1814		}
1815		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1816		if (err == 0)
1817			changelist_rename(clp, name, newname);
1818		if (err && flags->verbose) {
1819			(void) printf("failed (%u) - "
1820			    "will try again on next pass\n", errno);
1821		}
1822		err = EAGAIN;
1823	} else if (flags->verbose) {
1824		if (err == 0)
1825			(void) printf("success\n");
1826		else
1827			(void) printf("failed (%u)\n", errno);
1828	}
1829
1830	(void) changelist_postfix(clp);
1831	changelist_free(clp);
1832
1833	return (err);
1834}
1835
1836static int
1837recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
1838    char *newname, recvflags_t *flags)
1839{
1840	zfs_cmd_t zc = { 0 };
1841	int err = 0;
1842	prop_changelist_t *clp;
1843	zfs_handle_t *zhp;
1844	boolean_t defer = B_FALSE;
1845	int spa_version;
1846
1847	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1848	if (zhp == NULL)
1849		return (-1);
1850	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1851	    flags->force ? MS_FORCE : 0);
1852	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
1853	    zfs_spa_version(zhp, &spa_version) == 0 &&
1854	    spa_version >= SPA_VERSION_USERREFS)
1855		defer = B_TRUE;
1856	zfs_close(zhp);
1857	if (clp == NULL)
1858		return (-1);
1859	err = changelist_prefix(clp);
1860	if (err)
1861		return (err);
1862
1863	zc.zc_objset_type = DMU_OST_ZFS;
1864	zc.zc_defer_destroy = defer;
1865	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1866
1867	if (flags->verbose)
1868		(void) printf("attempting destroy %s\n", zc.zc_name);
1869	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1870	if (err == 0) {
1871		if (flags->verbose)
1872			(void) printf("success\n");
1873		changelist_remove(clp, zc.zc_name);
1874	}
1875
1876	(void) changelist_postfix(clp);
1877	changelist_free(clp);
1878
1879	/*
1880	 * Deferred destroy might destroy the snapshot or only mark it to be
1881	 * destroyed later, and it returns success in either case.
1882	 */
1883	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
1884	    ZFS_TYPE_SNAPSHOT))) {
1885		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1886	}
1887
1888	return (err);
1889}
1890
1891typedef struct guid_to_name_data {
1892	uint64_t guid;
1893	char *name;
1894	char *skip;
1895} guid_to_name_data_t;
1896
1897static int
1898guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1899{
1900	guid_to_name_data_t *gtnd = arg;
1901	int err;
1902
1903	if (gtnd->skip != NULL &&
1904	    strcmp(zhp->zfs_name, gtnd->skip) == 0) {
1905		return (0);
1906	}
1907
1908	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1909		(void) strcpy(gtnd->name, zhp->zfs_name);
1910		zfs_close(zhp);
1911		return (EEXIST);
1912	}
1913
1914	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1915	zfs_close(zhp);
1916	return (err);
1917}
1918
1919/*
1920 * Attempt to find the local dataset associated with this guid.  In the case of
1921 * multiple matches, we attempt to find the "best" match by searching
1922 * progressively larger portions of the hierarchy.  This allows one to send a
1923 * tree of datasets individually and guarantee that we will find the source
1924 * guid within that hierarchy, even if there are multiple matches elsewhere.
1925 */
1926static int
1927guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1928    char *name)
1929{
1930	/* exhaustive search all local snapshots */
1931	char pname[ZFS_MAXNAMELEN];
1932	guid_to_name_data_t gtnd;
1933	int err = 0;
1934	zfs_handle_t *zhp;
1935	char *cp;
1936
1937	gtnd.guid = guid;
1938	gtnd.name = name;
1939	gtnd.skip = NULL;
1940
1941	(void) strlcpy(pname, parent, sizeof (pname));
1942
1943	/*
1944	 * Search progressively larger portions of the hierarchy.  This will
1945	 * select the "most local" version of the origin snapshot in the case
1946	 * that there are multiple matching snapshots in the system.
1947	 */
1948	while ((cp = strrchr(pname, '/')) != NULL) {
1949
1950		/* Chop off the last component and open the parent */
1951		*cp = '\0';
1952		zhp = make_dataset_handle(hdl, pname);
1953
1954		if (zhp == NULL)
1955			continue;
1956
1957		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1958		zfs_close(zhp);
1959		if (err == EEXIST)
1960			return (0);
1961
1962		/*
1963		 * Remember the dataset that we already searched, so we
1964		 * skip it next time through.
1965		 */
1966		gtnd.skip = pname;
1967	}
1968
1969	return (ENOENT);
1970}
1971
1972/*
1973 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
1974 * guid1 is after guid2.
1975 */
1976static int
1977created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1978    uint64_t guid1, uint64_t guid2)
1979{
1980	nvlist_t *nvfs;
1981	char *fsname, *snapname;
1982	char buf[ZFS_MAXNAMELEN];
1983	int rv;
1984	zfs_handle_t *guid1hdl, *guid2hdl;
1985	uint64_t create1, create2;
1986
1987	if (guid2 == 0)
1988		return (0);
1989	if (guid1 == 0)
1990		return (1);
1991
1992	nvfs = fsavl_find(avl, guid1, &snapname);
1993	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1994	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1995	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1996	if (guid1hdl == NULL)
1997		return (-1);
1998
1999	nvfs = fsavl_find(avl, guid2, &snapname);
2000	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2001	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2002	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2003	if (guid2hdl == NULL) {
2004		zfs_close(guid1hdl);
2005		return (-1);
2006	}
2007
2008	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2009	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2010
2011	if (create1 < create2)
2012		rv = -1;
2013	else if (create1 > create2)
2014		rv = +1;
2015	else
2016		rv = 0;
2017
2018	zfs_close(guid1hdl);
2019	zfs_close(guid2hdl);
2020
2021	return (rv);
2022}
2023
2024static int
2025recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2026    recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2027    nvlist_t *renamed)
2028{
2029	nvlist_t *local_nv, *deleted = NULL;
2030	avl_tree_t *local_avl;
2031	nvpair_t *fselem, *nextfselem;
2032	char *fromsnap;
2033	char newname[ZFS_MAXNAMELEN];
2034	char guidname[32];
2035	int error;
2036	boolean_t needagain, progress, recursive;
2037	char *s1, *s2;
2038
2039	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2040
2041	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2042	    ENOENT);
2043
2044	if (flags->dryrun)
2045		return (0);
2046
2047again:
2048	needagain = progress = B_FALSE;
2049
2050	VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
2051
2052	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2053	    recursive, &local_nv, &local_avl)) != 0)
2054		return (error);
2055
2056	/*
2057	 * Process deletes and renames
2058	 */
2059	for (fselem = nvlist_next_nvpair(local_nv, NULL);
2060	    fselem; fselem = nextfselem) {
2061		nvlist_t *nvfs, *snaps;
2062		nvlist_t *stream_nvfs = NULL;
2063		nvpair_t *snapelem, *nextsnapelem;
2064		uint64_t fromguid = 0;
2065		uint64_t originguid = 0;
2066		uint64_t stream_originguid = 0;
2067		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2068		char *fsname, *stream_fsname;
2069
2070		nextfselem = nvlist_next_nvpair(local_nv, fselem);
2071
2072		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2073		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2074		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2075		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2076		    &parent_fromsnap_guid));
2077		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2078
2079		/*
2080		 * First find the stream's fs, so we can check for
2081		 * a different origin (due to "zfs promote")
2082		 */
2083		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2084		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2085			uint64_t thisguid;
2086
2087			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2088			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2089
2090			if (stream_nvfs != NULL)
2091				break;
2092		}
2093
2094		/* check for promote */
2095		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
2096		    &stream_originguid);
2097		if (stream_nvfs && originguid != stream_originguid) {
2098			switch (created_before(hdl, local_avl,
2099			    stream_originguid, originguid)) {
2100			case 1: {
2101				/* promote it! */
2102				zfs_cmd_t zc = { 0 };
2103				nvlist_t *origin_nvfs;
2104				char *origin_fsname;
2105
2106				if (flags->verbose)
2107					(void) printf("promoting %s\n", fsname);
2108
2109				origin_nvfs = fsavl_find(local_avl, originguid,
2110				    NULL);
2111				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2112				    "name", &origin_fsname));
2113				(void) strlcpy(zc.zc_value, origin_fsname,
2114				    sizeof (zc.zc_value));
2115				(void) strlcpy(zc.zc_name, fsname,
2116				    sizeof (zc.zc_name));
2117				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2118				if (error == 0)
2119					progress = B_TRUE;
2120				break;
2121			}
2122			default:
2123				break;
2124			case -1:
2125				fsavl_destroy(local_avl);
2126				nvlist_free(local_nv);
2127				return (-1);
2128			}
2129			/*
2130			 * We had/have the wrong origin, therefore our
2131			 * list of snapshots is wrong.  Need to handle
2132			 * them on the next pass.
2133			 */
2134			needagain = B_TRUE;
2135			continue;
2136		}
2137
2138		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2139		    snapelem; snapelem = nextsnapelem) {
2140			uint64_t thisguid;
2141			char *stream_snapname;
2142			nvlist_t *found, *props;
2143
2144			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2145
2146			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2147			found = fsavl_find(stream_avl, thisguid,
2148			    &stream_snapname);
2149
2150			/* check for delete */
2151			if (found == NULL) {
2152				char name[ZFS_MAXNAMELEN];
2153
2154				if (!flags->force)
2155					continue;
2156
2157				(void) snprintf(name, sizeof (name), "%s@%s",
2158				    fsname, nvpair_name(snapelem));
2159
2160				error = recv_destroy(hdl, name,
2161				    strlen(fsname)+1, newname, flags);
2162				if (error)
2163					needagain = B_TRUE;
2164				else
2165					progress = B_TRUE;
2166				sprintf(guidname, "%lu", thisguid);
2167				nvlist_add_boolean(deleted, guidname);
2168				continue;
2169			}
2170
2171			stream_nvfs = found;
2172
2173			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2174			    &props) && 0 == nvlist_lookup_nvlist(props,
2175			    stream_snapname, &props)) {
2176				zfs_cmd_t zc = { 0 };
2177
2178				zc.zc_cookie = B_TRUE; /* received */
2179				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2180				    "%s@%s", fsname, nvpair_name(snapelem));
2181				if (zcmd_write_src_nvlist(hdl, &zc,
2182				    props) == 0) {
2183					(void) zfs_ioctl(hdl,
2184					    ZFS_IOC_SET_PROP, &zc);
2185					zcmd_free_nvlists(&zc);
2186				}
2187			}
2188
2189			/* check for different snapname */
2190			if (strcmp(nvpair_name(snapelem),
2191			    stream_snapname) != 0) {
2192				char name[ZFS_MAXNAMELEN];
2193				char tryname[ZFS_MAXNAMELEN];
2194
2195				(void) snprintf(name, sizeof (name), "%s@%s",
2196				    fsname, nvpair_name(snapelem));
2197				(void) snprintf(tryname, sizeof (name), "%s@%s",
2198				    fsname, stream_snapname);
2199
2200				error = recv_rename(hdl, name, tryname,
2201				    strlen(fsname)+1, newname, flags);
2202				if (error)
2203					needagain = B_TRUE;
2204				else
2205					progress = B_TRUE;
2206			}
2207
2208			if (strcmp(stream_snapname, fromsnap) == 0)
2209				fromguid = thisguid;
2210		}
2211
2212		/* check for delete */
2213		if (stream_nvfs == NULL) {
2214			if (!flags->force)
2215				continue;
2216
2217			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2218			    newname, flags);
2219			if (error)
2220				needagain = B_TRUE;
2221			else
2222				progress = B_TRUE;
2223			sprintf(guidname, "%lu", parent_fromsnap_guid);
2224			nvlist_add_boolean(deleted, guidname);
2225			continue;
2226		}
2227
2228		if (fromguid == 0) {
2229			if (flags->verbose) {
2230				(void) printf("local fs %s does not have "
2231				    "fromsnap (%s in stream); must have "
2232				    "been deleted locally; ignoring\n",
2233				    fsname, fromsnap);
2234			}
2235			continue;
2236		}
2237
2238		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2239		    "name", &stream_fsname));
2240		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2241		    "parentfromsnap", &stream_parent_fromsnap_guid));
2242
2243		s1 = strrchr(fsname, '/');
2244		s2 = strrchr(stream_fsname, '/');
2245
2246		/*
2247		 * Check if we're going to rename based on parent guid change
2248		 * and the current parent guid was also deleted. If it was then
2249		 * rename will fail and is likely unneeded, so avoid this and
2250		 * force an early retry to determine the new
2251		 * parent_fromsnap_guid.
2252		 */
2253		if (stream_parent_fromsnap_guid != 0 &&
2254                    parent_fromsnap_guid != 0 &&
2255                    stream_parent_fromsnap_guid != parent_fromsnap_guid) {
2256			sprintf(guidname, "%lu", parent_fromsnap_guid);
2257			if (nvlist_exists(deleted, guidname)) {
2258				progress = B_TRUE;
2259				needagain = B_TRUE;
2260				goto doagain;
2261			}
2262		}
2263
2264		/*
2265		 * Check for rename. If the exact receive path is specified, it
2266		 * does not count as a rename, but we still need to check the
2267		 * datasets beneath it.
2268		 */
2269		if ((stream_parent_fromsnap_guid != 0 &&
2270		    parent_fromsnap_guid != 0 &&
2271		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2272		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2273		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2274			nvlist_t *parent;
2275			char tryname[ZFS_MAXNAMELEN];
2276
2277			parent = fsavl_find(local_avl,
2278			    stream_parent_fromsnap_guid, NULL);
2279			/*
2280			 * NB: parent might not be found if we used the
2281			 * tosnap for stream_parent_fromsnap_guid,
2282			 * because the parent is a newly-created fs;
2283			 * we'll be able to rename it after we recv the
2284			 * new fs.
2285			 */
2286			if (parent != NULL) {
2287				char *pname;
2288
2289				VERIFY(0 == nvlist_lookup_string(parent, "name",
2290				    &pname));
2291				(void) snprintf(tryname, sizeof (tryname),
2292				    "%s%s", pname, strrchr(stream_fsname, '/'));
2293			} else {
2294				tryname[0] = '\0';
2295				if (flags->verbose) {
2296					(void) printf("local fs %s new parent "
2297					    "not found\n", fsname);
2298				}
2299			}
2300
2301			newname[0] = '\0';
2302
2303			error = recv_rename(hdl, fsname, tryname,
2304			    strlen(tofs)+1, newname, flags);
2305
2306			if (renamed != NULL && newname[0] != '\0') {
2307				VERIFY(0 == nvlist_add_boolean(renamed,
2308				    newname));
2309			}
2310
2311			if (error)
2312				needagain = B_TRUE;
2313			else
2314				progress = B_TRUE;
2315		}
2316	}
2317
2318doagain:
2319	fsavl_destroy(local_avl);
2320	nvlist_free(local_nv);
2321	nvlist_free(deleted);
2322
2323	if (needagain && progress) {
2324		/* do another pass to fix up temporary names */
2325		if (flags->verbose)
2326			(void) printf("another pass:\n");
2327		goto again;
2328	}
2329
2330	return (needagain);
2331}
2332
2333static int
2334zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2335    recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2336    char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2337{
2338	nvlist_t *stream_nv = NULL;
2339	avl_tree_t *stream_avl = NULL;
2340	char *fromsnap = NULL;
2341	char *cp;
2342	char tofs[ZFS_MAXNAMELEN];
2343	char sendfs[ZFS_MAXNAMELEN];
2344	char errbuf[1024];
2345	dmu_replay_record_t drre;
2346	int error;
2347	boolean_t anyerr = B_FALSE;
2348	boolean_t softerr = B_FALSE;
2349	boolean_t recursive;
2350
2351	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2352	    "cannot receive"));
2353
2354	assert(drr->drr_type == DRR_BEGIN);
2355	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2356	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2357	    DMU_COMPOUNDSTREAM);
2358
2359	/*
2360	 * Read in the nvlist from the stream.
2361	 */
2362	if (drr->drr_payloadlen != 0) {
2363		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2364		    &stream_nv, flags->byteswap, zc);
2365		if (error) {
2366			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2367			goto out;
2368		}
2369	}
2370
2371	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2372	    ENOENT);
2373
2374	if (recursive && strchr(destname, '@')) {
2375		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2376		    "cannot specify snapshot name for multi-snapshot stream"));
2377		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2378		goto out;
2379	}
2380
2381	/*
2382	 * Read in the end record and verify checksum.
2383	 */
2384	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2385	    flags->byteswap, NULL)))
2386		goto out;
2387	if (flags->byteswap) {
2388		drre.drr_type = BSWAP_32(drre.drr_type);
2389		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2390		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2391		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2392		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2393		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2394		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2395		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2396		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2397	}
2398	if (drre.drr_type != DRR_END) {
2399		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2400		goto out;
2401	}
2402	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2403		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2404		    "incorrect header checksum"));
2405		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2406		goto out;
2407	}
2408
2409	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2410
2411	if (drr->drr_payloadlen != 0) {
2412		nvlist_t *stream_fss;
2413
2414		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2415		    &stream_fss));
2416		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2417			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2418			    "couldn't allocate avl tree"));
2419			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2420			goto out;
2421		}
2422
2423		if (fromsnap != NULL) {
2424			nvlist_t *renamed = NULL;
2425			nvpair_t *pair = NULL;
2426
2427			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
2428			if (flags->isprefix) {
2429				struct drr_begin *drrb = &drr->drr_u.drr_begin;
2430				int i;
2431
2432				if (flags->istail) {
2433					cp = strrchr(drrb->drr_toname, '/');
2434					if (cp == NULL) {
2435						(void) strlcat(tofs, "/",
2436						    ZFS_MAXNAMELEN);
2437						i = 0;
2438					} else {
2439						i = (cp - drrb->drr_toname);
2440					}
2441				} else {
2442					i = strcspn(drrb->drr_toname, "/@");
2443				}
2444				/* zfs_receive_one() will create_parents() */
2445				(void) strlcat(tofs, &drrb->drr_toname[i],
2446				    ZFS_MAXNAMELEN);
2447				*strchr(tofs, '@') = '\0';
2448			}
2449
2450			if (recursive && !flags->dryrun && !flags->nomount) {
2451				VERIFY(0 == nvlist_alloc(&renamed,
2452				    NV_UNIQUE_NAME, 0));
2453			}
2454
2455			softerr = recv_incremental_replication(hdl, tofs, flags,
2456			    stream_nv, stream_avl, renamed);
2457
2458			/* Unmount renamed filesystems before receiving. */
2459			while ((pair = nvlist_next_nvpair(renamed,
2460			    pair)) != NULL) {
2461				zfs_handle_t *zhp;
2462				prop_changelist_t *clp = NULL;
2463
2464				zhp = zfs_open(hdl, nvpair_name(pair),
2465				    ZFS_TYPE_FILESYSTEM);
2466				if (zhp != NULL) {
2467					clp = changelist_gather(zhp,
2468					    ZFS_PROP_MOUNTPOINT, 0, 0);
2469					zfs_close(zhp);
2470					if (clp != NULL) {
2471						softerr |=
2472						    changelist_prefix(clp);
2473						changelist_free(clp);
2474					}
2475				}
2476			}
2477
2478			nvlist_free(renamed);
2479		}
2480	}
2481
2482	/*
2483	 * Get the fs specified by the first path in the stream (the top level
2484	 * specified by 'zfs send') and pass it to each invocation of
2485	 * zfs_receive_one().
2486	 */
2487	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2488	    ZFS_MAXNAMELEN);
2489	if ((cp = strchr(sendfs, '@')) != NULL)
2490		*cp = '\0';
2491
2492	/* Finally, receive each contained stream */
2493	do {
2494		/*
2495		 * we should figure out if it has a recoverable
2496		 * error, in which case do a recv_skip() and drive on.
2497		 * Note, if we fail due to already having this guid,
2498		 * zfs_receive_one() will take care of it (ie,
2499		 * recv_skip() and return 0).
2500		 */
2501		error = zfs_receive_impl(hdl, destname, flags, fd,
2502		    sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2503		    action_handlep);
2504		if (error == ENODATA) {
2505			error = 0;
2506			break;
2507		}
2508		anyerr |= error;
2509	} while (error == 0);
2510
2511	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2512		/*
2513		 * Now that we have the fs's they sent us, try the
2514		 * renames again.
2515		 */
2516		softerr = recv_incremental_replication(hdl, tofs, flags,
2517		    stream_nv, stream_avl, NULL);
2518	}
2519
2520out:
2521	fsavl_destroy(stream_avl);
2522	if (stream_nv)
2523		nvlist_free(stream_nv);
2524	if (softerr)
2525		error = -2;
2526	if (anyerr)
2527		error = -1;
2528	return (error);
2529}
2530
2531static void
2532trunc_prop_errs(int truncated)
2533{
2534	ASSERT(truncated != 0);
2535
2536	if (truncated == 1)
2537		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2538		    "1 more property could not be set\n"));
2539	else
2540		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2541		    "%d more properties could not be set\n"), truncated);
2542}
2543
2544static int
2545recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2546{
2547	dmu_replay_record_t *drr;
2548	void *buf = malloc(1<<20);
2549	char errbuf[1024];
2550
2551	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2552	    "cannot receive:"));
2553
2554	/* XXX would be great to use lseek if possible... */
2555	drr = buf;
2556
2557	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2558	    byteswap, NULL) == 0) {
2559		if (byteswap)
2560			drr->drr_type = BSWAP_32(drr->drr_type);
2561
2562		switch (drr->drr_type) {
2563		case DRR_BEGIN:
2564			/* NB: not to be used on v2 stream packages */
2565			if (drr->drr_payloadlen != 0) {
2566				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2567				    "invalid substream header"));
2568				return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2569			}
2570			break;
2571
2572		case DRR_END:
2573			free(buf);
2574			return (0);
2575
2576		case DRR_OBJECT:
2577			if (byteswap) {
2578				drr->drr_u.drr_object.drr_bonuslen =
2579				    BSWAP_32(drr->drr_u.drr_object.
2580				    drr_bonuslen);
2581			}
2582			(void) recv_read(hdl, fd, buf,
2583			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2584			    B_FALSE, NULL);
2585			break;
2586
2587		case DRR_WRITE:
2588			if (byteswap) {
2589				drr->drr_u.drr_write.drr_length =
2590				    BSWAP_64(drr->drr_u.drr_write.drr_length);
2591			}
2592			(void) recv_read(hdl, fd, buf,
2593			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2594			break;
2595		case DRR_SPILL:
2596			if (byteswap) {
2597				drr->drr_u.drr_write.drr_length =
2598				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
2599			}
2600			(void) recv_read(hdl, fd, buf,
2601			    drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2602			break;
2603		case DRR_WRITE_EMBEDDED:
2604			if (byteswap) {
2605				drr->drr_u.drr_write_embedded.drr_psize =
2606				    BSWAP_32(drr->drr_u.drr_write_embedded.
2607				    drr_psize);
2608			}
2609			(void) recv_read(hdl, fd, buf,
2610			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
2611			    8), B_FALSE, NULL);
2612			break;
2613		case DRR_WRITE_BYREF:
2614		case DRR_FREEOBJECTS:
2615		case DRR_FREE:
2616			break;
2617
2618		default:
2619			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2620			    "invalid record type"));
2621			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2622		}
2623	}
2624
2625	free(buf);
2626	return (-1);
2627}
2628
2629/*
2630 * Restores a backup of tosnap from the file descriptor specified by infd.
2631 */
2632static int
2633zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
2634    recvflags_t *flags, dmu_replay_record_t *drr,
2635    dmu_replay_record_t *drr_noswap, const char *sendfs,
2636    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
2637    uint64_t *action_handlep)
2638{
2639	zfs_cmd_t zc = { 0 };
2640	time_t begin_time;
2641	int ioctl_err, ioctl_errno, err;
2642	char *cp;
2643	struct drr_begin *drrb = &drr->drr_u.drr_begin;
2644	char errbuf[1024];
2645	char prop_errbuf[1024];
2646	const char *chopprefix;
2647	boolean_t newfs = B_FALSE;
2648	boolean_t stream_wantsnewfs;
2649	uint64_t parent_snapguid = 0;
2650	prop_changelist_t *clp = NULL;
2651	nvlist_t *snapprops_nvlist = NULL;
2652	zprop_errflags_t prop_errflags;
2653	boolean_t recursive;
2654
2655	begin_time = time(NULL);
2656
2657	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2658	    "cannot receive"));
2659
2660	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2661	    ENOENT);
2662
2663	if (stream_avl != NULL) {
2664		char *snapname;
2665		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
2666		    &snapname);
2667		nvlist_t *props;
2668		int ret;
2669
2670		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
2671		    &parent_snapguid);
2672		err = nvlist_lookup_nvlist(fs, "props", &props);
2673		if (err)
2674			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
2675
2676		if (flags->canmountoff) {
2677			VERIFY(0 == nvlist_add_uint64(props,
2678			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
2679		}
2680		ret = zcmd_write_src_nvlist(hdl, &zc, props);
2681		if (err)
2682			nvlist_free(props);
2683
2684		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
2685			VERIFY(0 == nvlist_lookup_nvlist(props,
2686			    snapname, &snapprops_nvlist));
2687		}
2688
2689		if (ret != 0)
2690			return (-1);
2691	}
2692
2693	cp = NULL;
2694
2695	/*
2696	 * Determine how much of the snapshot name stored in the stream
2697	 * we are going to tack on to the name they specified on the
2698	 * command line, and how much we are going to chop off.
2699	 *
2700	 * If they specified a snapshot, chop the entire name stored in
2701	 * the stream.
2702	 */
2703	if (flags->istail) {
2704		/*
2705		 * A filesystem was specified with -e. We want to tack on only
2706		 * the tail of the sent snapshot path.
2707		 */
2708		if (strchr(tosnap, '@')) {
2709			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2710			    "argument - snapshot not allowed with -e"));
2711			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2712		}
2713
2714		chopprefix = strrchr(sendfs, '/');
2715
2716		if (chopprefix == NULL) {
2717			/*
2718			 * The tail is the poolname, so we need to
2719			 * prepend a path separator.
2720			 */
2721			int len = strlen(drrb->drr_toname);
2722			cp = malloc(len + 2);
2723			cp[0] = '/';
2724			(void) strcpy(&cp[1], drrb->drr_toname);
2725			chopprefix = cp;
2726		} else {
2727			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
2728		}
2729	} else if (flags->isprefix) {
2730		/*
2731		 * A filesystem was specified with -d. We want to tack on
2732		 * everything but the first element of the sent snapshot path
2733		 * (all but the pool name).
2734		 */
2735		if (strchr(tosnap, '@')) {
2736			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2737			    "argument - snapshot not allowed with -d"));
2738			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2739		}
2740
2741		chopprefix = strchr(drrb->drr_toname, '/');
2742		if (chopprefix == NULL)
2743			chopprefix = strchr(drrb->drr_toname, '@');
2744	} else if (strchr(tosnap, '@') == NULL) {
2745		/*
2746		 * If a filesystem was specified without -d or -e, we want to
2747		 * tack on everything after the fs specified by 'zfs send'.
2748		 */
2749		chopprefix = drrb->drr_toname + strlen(sendfs);
2750	} else {
2751		/* A snapshot was specified as an exact path (no -d or -e). */
2752		if (recursive) {
2753			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2754			    "cannot specify snapshot name for multi-snapshot "
2755			    "stream"));
2756			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2757		}
2758		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
2759	}
2760
2761	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
2762	ASSERT(chopprefix > drrb->drr_toname);
2763	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
2764	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
2765	    chopprefix[0] == '\0');
2766
2767	/*
2768	 * Determine name of destination snapshot, store in zc_value.
2769	 */
2770	(void) strcpy(zc.zc_value, tosnap);
2771	(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
2772#ifdef __FreeBSD__
2773	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
2774		zfs_ioctl_version = get_zfs_ioctl_version();
2775	/*
2776	 * For forward compatibility hide tosnap in zc_value
2777	 */
2778	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
2779		(void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap);
2780#endif
2781	free(cp);
2782	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
2783		zcmd_free_nvlists(&zc);
2784		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2785	}
2786
2787	/*
2788	 * Determine the name of the origin snapshot, store in zc_string.
2789	 */
2790	if (drrb->drr_flags & DRR_FLAG_CLONE) {
2791		if (guid_to_name(hdl, zc.zc_value,
2792		    drrb->drr_fromguid, zc.zc_string) != 0) {
2793			zcmd_free_nvlists(&zc);
2794			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2795			    "local origin for clone %s does not exist"),
2796			    zc.zc_value);
2797			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2798		}
2799		if (flags->verbose)
2800			(void) printf("found clone origin %s\n", zc.zc_string);
2801	}
2802
2803	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
2804	    (drrb->drr_flags & DRR_FLAG_CLONE));
2805
2806	if (stream_wantsnewfs) {
2807		/*
2808		 * if the parent fs does not exist, look for it based on
2809		 * the parent snap GUID
2810		 */
2811		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2812		    "cannot receive new filesystem stream"));
2813
2814		(void) strcpy(zc.zc_name, zc.zc_value);
2815		cp = strrchr(zc.zc_name, '/');
2816		if (cp)
2817			*cp = '\0';
2818		if (cp &&
2819		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2820			char suffix[ZFS_MAXNAMELEN];
2821			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
2822			if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
2823			    zc.zc_value) == 0) {
2824				*strchr(zc.zc_value, '@') = '\0';
2825				(void) strcat(zc.zc_value, suffix);
2826			}
2827		}
2828	} else {
2829		/*
2830		 * if the fs does not exist, look for it based on the
2831		 * fromsnap GUID
2832		 */
2833		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2834		    "cannot receive incremental stream"));
2835
2836		(void) strcpy(zc.zc_name, zc.zc_value);
2837		*strchr(zc.zc_name, '@') = '\0';
2838
2839		/*
2840		 * If the exact receive path was specified and this is the
2841		 * topmost path in the stream, then if the fs does not exist we
2842		 * should look no further.
2843		 */
2844		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
2845		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
2846		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2847			char snap[ZFS_MAXNAMELEN];
2848			(void) strcpy(snap, strchr(zc.zc_value, '@'));
2849			if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
2850			    zc.zc_value) == 0) {
2851				*strchr(zc.zc_value, '@') = '\0';
2852				(void) strcat(zc.zc_value, snap);
2853			}
2854		}
2855	}
2856
2857	(void) strcpy(zc.zc_name, zc.zc_value);
2858	*strchr(zc.zc_name, '@') = '\0';
2859
2860	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2861		zfs_handle_t *zhp;
2862
2863		/*
2864		 * Destination fs exists.  Therefore this should either
2865		 * be an incremental, or the stream specifies a new fs
2866		 * (full stream or clone) and they want us to blow it
2867		 * away (and have therefore specified -F and removed any
2868		 * snapshots).
2869		 */
2870		if (stream_wantsnewfs) {
2871			if (!flags->force) {
2872				zcmd_free_nvlists(&zc);
2873				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2874				    "destination '%s' exists\n"
2875				    "must specify -F to overwrite it"),
2876				    zc.zc_name);
2877				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2878			}
2879			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2880			    &zc) == 0) {
2881				zcmd_free_nvlists(&zc);
2882				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2883				    "destination has snapshots (eg. %s)\n"
2884				    "must destroy them to overwrite it"),
2885				    zc.zc_name);
2886				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2887			}
2888		}
2889
2890		if ((zhp = zfs_open(hdl, zc.zc_name,
2891		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
2892			zcmd_free_nvlists(&zc);
2893			return (-1);
2894		}
2895
2896		if (stream_wantsnewfs &&
2897		    zhp->zfs_dmustats.dds_origin[0]) {
2898			zcmd_free_nvlists(&zc);
2899			zfs_close(zhp);
2900			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2901			    "destination '%s' is a clone\n"
2902			    "must destroy it to overwrite it"),
2903			    zc.zc_name);
2904			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2905		}
2906
2907		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
2908		    stream_wantsnewfs) {
2909			/* We can't do online recv in this case */
2910			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
2911			if (clp == NULL) {
2912				zfs_close(zhp);
2913				zcmd_free_nvlists(&zc);
2914				return (-1);
2915			}
2916			if (changelist_prefix(clp) != 0) {
2917				changelist_free(clp);
2918				zfs_close(zhp);
2919				zcmd_free_nvlists(&zc);
2920				return (-1);
2921			}
2922		}
2923		zfs_close(zhp);
2924	} else {
2925		/*
2926		 * Destination filesystem does not exist.  Therefore we better
2927		 * be creating a new filesystem (either from a full backup, or
2928		 * a clone).  It would therefore be invalid if the user
2929		 * specified only the pool name (i.e. if the destination name
2930		 * contained no slash character).
2931		 */
2932		if (!stream_wantsnewfs ||
2933		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
2934			zcmd_free_nvlists(&zc);
2935			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2936			    "destination '%s' does not exist"), zc.zc_name);
2937			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2938		}
2939
2940		/*
2941		 * Trim off the final dataset component so we perform the
2942		 * recvbackup ioctl to the filesystems's parent.
2943		 */
2944		*cp = '\0';
2945
2946		if (flags->isprefix && !flags->istail && !flags->dryrun &&
2947		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
2948			zcmd_free_nvlists(&zc);
2949			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
2950		}
2951
2952		newfs = B_TRUE;
2953	}
2954
2955	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
2956	zc.zc_cookie = infd;
2957	zc.zc_guid = flags->force;
2958	if (flags->verbose) {
2959		(void) printf("%s %s stream of %s into %s\n",
2960		    flags->dryrun ? "would receive" : "receiving",
2961		    drrb->drr_fromguid ? "incremental" : "full",
2962		    drrb->drr_toname, zc.zc_value);
2963		(void) fflush(stdout);
2964	}
2965
2966	if (flags->dryrun) {
2967		zcmd_free_nvlists(&zc);
2968		return (recv_skip(hdl, infd, flags->byteswap));
2969	}
2970
2971	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
2972	zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
2973	zc.zc_cleanup_fd = cleanup_fd;
2974	zc.zc_action_handle = *action_handlep;
2975
2976	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
2977	ioctl_errno = errno;
2978	prop_errflags = (zprop_errflags_t)zc.zc_obj;
2979
2980	if (err == 0) {
2981		nvlist_t *prop_errors;
2982		VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
2983		    zc.zc_nvlist_dst_size, &prop_errors, 0));
2984
2985		nvpair_t *prop_err = NULL;
2986
2987		while ((prop_err = nvlist_next_nvpair(prop_errors,
2988		    prop_err)) != NULL) {
2989			char tbuf[1024];
2990			zfs_prop_t prop;
2991			int intval;
2992
2993			prop = zfs_name_to_prop(nvpair_name(prop_err));
2994			(void) nvpair_value_int32(prop_err, &intval);
2995			if (strcmp(nvpair_name(prop_err),
2996			    ZPROP_N_MORE_ERRORS) == 0) {
2997				trunc_prop_errs(intval);
2998				break;
2999			} else {
3000				(void) snprintf(tbuf, sizeof (tbuf),
3001				    dgettext(TEXT_DOMAIN,
3002				    "cannot receive %s property on %s"),
3003				    nvpair_name(prop_err), zc.zc_name);
3004				zfs_setprop_error(hdl, prop, intval, tbuf);
3005			}
3006		}
3007		nvlist_free(prop_errors);
3008	}
3009
3010	zc.zc_nvlist_dst = 0;
3011	zc.zc_nvlist_dst_size = 0;
3012	zcmd_free_nvlists(&zc);
3013
3014	if (err == 0 && snapprops_nvlist) {
3015		zfs_cmd_t zc2 = { 0 };
3016
3017		(void) strcpy(zc2.zc_name, zc.zc_value);
3018		zc2.zc_cookie = B_TRUE; /* received */
3019		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
3020			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
3021			zcmd_free_nvlists(&zc2);
3022		}
3023	}
3024
3025	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
3026		/*
3027		 * It may be that this snapshot already exists,
3028		 * in which case we want to consume & ignore it
3029		 * rather than failing.
3030		 */
3031		avl_tree_t *local_avl;
3032		nvlist_t *local_nv, *fs;
3033		cp = strchr(zc.zc_value, '@');
3034
3035		/*
3036		 * XXX Do this faster by just iterating over snaps in
3037		 * this fs.  Also if zc_value does not exist, we will
3038		 * get a strange "does not exist" error message.
3039		 */
3040		*cp = '\0';
3041		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
3042		    &local_nv, &local_avl) == 0) {
3043			*cp = '@';
3044			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
3045			fsavl_destroy(local_avl);
3046			nvlist_free(local_nv);
3047
3048			if (fs != NULL) {
3049				if (flags->verbose) {
3050					(void) printf("snap %s already exists; "
3051					    "ignoring\n", zc.zc_value);
3052				}
3053				err = ioctl_err = recv_skip(hdl, infd,
3054				    flags->byteswap);
3055			}
3056		}
3057		*cp = '@';
3058	}
3059
3060	if (ioctl_err != 0) {
3061		switch (ioctl_errno) {
3062		case ENODEV:
3063			cp = strchr(zc.zc_value, '@');
3064			*cp = '\0';
3065			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3066			    "most recent snapshot of %s does not\n"
3067			    "match incremental source"), zc.zc_value);
3068			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3069			*cp = '@';
3070			break;
3071		case ETXTBSY:
3072			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3073			    "destination %s has been modified\n"
3074			    "since most recent snapshot"), zc.zc_name);
3075			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3076			break;
3077		case EEXIST:
3078			cp = strchr(zc.zc_value, '@');
3079			if (newfs) {
3080				/* it's the containing fs that exists */
3081				*cp = '\0';
3082			}
3083			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3084			    "destination already exists"));
3085			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
3086			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3087			    zc.zc_value);
3088			*cp = '@';
3089			break;
3090		case EINVAL:
3091			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3092			break;
3093		case ECKSUM:
3094			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3095			    "invalid stream (checksum mismatch)"));
3096			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3097			break;
3098		case ENOTSUP:
3099			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3100			    "pool must be upgraded to receive this stream."));
3101			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
3102			break;
3103		case EDQUOT:
3104			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3105			    "destination %s space quota exceeded"), zc.zc_name);
3106			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
3107			break;
3108		default:
3109			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
3110		}
3111	}
3112
3113	/*
3114	 * Mount the target filesystem (if created).  Also mount any
3115	 * children of the target filesystem if we did a replication
3116	 * receive (indicated by stream_avl being non-NULL).
3117	 */
3118	cp = strchr(zc.zc_value, '@');
3119	if (cp && (ioctl_err == 0 || !newfs)) {
3120		zfs_handle_t *h;
3121
3122		*cp = '\0';
3123		h = zfs_open(hdl, zc.zc_value,
3124		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3125		if (h != NULL) {
3126			if (h->zfs_type == ZFS_TYPE_VOLUME) {
3127				*cp = '@';
3128			} else if (newfs || stream_avl) {
3129				/*
3130				 * Track the first/top of hierarchy fs,
3131				 * for mounting and sharing later.
3132				 */
3133				if (top_zfs && *top_zfs == NULL)
3134					*top_zfs = zfs_strdup(hdl, zc.zc_value);
3135			}
3136			zfs_close(h);
3137		}
3138		*cp = '@';
3139	}
3140
3141	if (clp) {
3142		err |= changelist_postfix(clp);
3143		changelist_free(clp);
3144	}
3145
3146	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3147		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3148		    "failed to clear unreceived properties on %s"),
3149		    zc.zc_name);
3150		(void) fprintf(stderr, "\n");
3151	}
3152	if (prop_errflags & ZPROP_ERR_NORESTORE) {
3153		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3154		    "failed to restore original properties on %s"),
3155		    zc.zc_name);
3156		(void) fprintf(stderr, "\n");
3157	}
3158
3159	if (err || ioctl_err)
3160		return (-1);
3161
3162	*action_handlep = zc.zc_action_handle;
3163
3164	if (flags->verbose) {
3165		char buf1[64];
3166		char buf2[64];
3167		uint64_t bytes = zc.zc_cookie;
3168		time_t delta = time(NULL) - begin_time;
3169		if (delta == 0)
3170			delta = 1;
3171		zfs_nicenum(bytes, buf1, sizeof (buf1));
3172		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3173
3174		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3175		    buf1, delta, buf2);
3176	}
3177
3178	return (0);
3179}
3180
3181static int
3182zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3183    int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3184    char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
3185{
3186	int err;
3187	dmu_replay_record_t drr, drr_noswap;
3188	struct drr_begin *drrb = &drr.drr_u.drr_begin;
3189	char errbuf[1024];
3190	zio_cksum_t zcksum = { 0 };
3191	uint64_t featureflags;
3192	int hdrtype;
3193
3194	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3195	    "cannot receive"));
3196
3197	if (flags->isprefix &&
3198	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3199		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3200		    "(%s) does not exist"), tosnap);
3201		return (zfs_error(hdl, EZFS_NOENT, errbuf));
3202	}
3203
3204	/* read in the BEGIN record */
3205	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3206	    &zcksum)))
3207		return (err);
3208
3209	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3210		/* It's the double end record at the end of a package */
3211		return (ENODATA);
3212	}
3213
3214	/* the kernel needs the non-byteswapped begin record */
3215	drr_noswap = drr;
3216
3217	flags->byteswap = B_FALSE;
3218	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3219		/*
3220		 * We computed the checksum in the wrong byteorder in
3221		 * recv_read() above; do it again correctly.
3222		 */
3223		bzero(&zcksum, sizeof (zio_cksum_t));
3224		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
3225		flags->byteswap = B_TRUE;
3226
3227		drr.drr_type = BSWAP_32(drr.drr_type);
3228		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3229		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3230		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3231		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3232		drrb->drr_type = BSWAP_32(drrb->drr_type);
3233		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3234		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3235		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3236	}
3237
3238	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3239		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3240		    "stream (bad magic number)"));
3241		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3242	}
3243
3244	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3245	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3246
3247	if (!DMU_STREAM_SUPPORTED(featureflags) ||
3248	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3249		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3250		    "stream has unsupported feature, feature flags = %lx"),
3251		    featureflags);
3252		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3253	}
3254
3255	if (strchr(drrb->drr_toname, '@') == NULL) {
3256		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3257		    "stream (bad snapshot name)"));
3258		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3259	}
3260
3261	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3262		char nonpackage_sendfs[ZFS_MAXNAMELEN];
3263		if (sendfs == NULL) {
3264			/*
3265			 * We were not called from zfs_receive_package(). Get
3266			 * the fs specified by 'zfs send'.
3267			 */
3268			char *cp;
3269			(void) strlcpy(nonpackage_sendfs,
3270			    drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
3271			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3272				*cp = '\0';
3273			sendfs = nonpackage_sendfs;
3274		}
3275		return (zfs_receive_one(hdl, infd, tosnap, flags,
3276		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
3277		    top_zfs, cleanup_fd, action_handlep));
3278	} else {
3279		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3280		    DMU_COMPOUNDSTREAM);
3281		return (zfs_receive_package(hdl, infd, tosnap, flags,
3282		    &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
3283	}
3284}
3285
3286/*
3287 * Restores a backup of tosnap from the file descriptor specified by infd.
3288 * Return 0 on total success, -2 if some things couldn't be
3289 * destroyed/renamed/promoted, -1 if some things couldn't be received.
3290 * (-1 will override -2).
3291 */
3292int
3293zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3294    int infd, avl_tree_t *stream_avl)
3295{
3296	char *top_zfs = NULL;
3297	int err;
3298	int cleanup_fd;
3299	uint64_t action_handle = 0;
3300
3301	cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
3302	VERIFY(cleanup_fd >= 0);
3303
3304	err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
3305	    stream_avl, &top_zfs, cleanup_fd, &action_handle);
3306
3307	VERIFY(0 == close(cleanup_fd));
3308
3309	if (err == 0 && !flags->nomount && top_zfs) {
3310		zfs_handle_t *zhp;
3311		prop_changelist_t *clp;
3312
3313		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3314		if (zhp != NULL) {
3315			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3316			    CL_GATHER_MOUNT_ALWAYS, 0);
3317			zfs_close(zhp);
3318			if (clp != NULL) {
3319				/* mount and share received datasets */
3320				err = changelist_postfix(clp);
3321				changelist_free(clp);
3322			}
3323		}
3324		if (zhp == NULL || clp == NULL || err)
3325			err = -1;
3326	}
3327	if (top_zfs)
3328		free(top_zfs);
3329
3330	return (err);
3331}
3332