libzfs_sendrecv.c revision 276081
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27 * All rights reserved.
28 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 */
30
31#include <assert.h>
32#include <ctype.h>
33#include <errno.h>
34#include <libintl.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <strings.h>
38#include <unistd.h>
39#include <stddef.h>
40#include <fcntl.h>
41#include <sys/param.h>
42#include <sys/mount.h>
43#include <pthread.h>
44#include <umem.h>
45#include <time.h>
46
47#include <libzfs.h>
48#include <libzfs_core.h>
49
50#include "zfs_namecheck.h"
51#include "zfs_prop.h"
52#include "zfs_fletcher.h"
53#include "libzfs_impl.h"
54#include <sha2.h>
55#include <sys/zio_checksum.h>
56#include <sys/ddt.h>
57
58#ifdef __FreeBSD__
59extern int zfs_ioctl_version;
60#endif
61
62/* in libzfs_dataset.c */
63extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
64/* We need to use something for ENODATA. */
65#define	ENODATA	EIDRM
66
67static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
68    int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
69
70static const zio_cksum_t zero_cksum = { 0 };
71
72typedef struct dedup_arg {
73	int	inputfd;
74	int	outputfd;
75	libzfs_handle_t  *dedup_hdl;
76} dedup_arg_t;
77
78typedef struct progress_arg {
79	zfs_handle_t *pa_zhp;
80	int pa_fd;
81	boolean_t pa_parsable;
82} progress_arg_t;
83
84typedef struct dataref {
85	uint64_t ref_guid;
86	uint64_t ref_object;
87	uint64_t ref_offset;
88} dataref_t;
89
90typedef struct dedup_entry {
91	struct dedup_entry	*dde_next;
92	zio_cksum_t dde_chksum;
93	uint64_t dde_prop;
94	dataref_t dde_ref;
95} dedup_entry_t;
96
97#define	MAX_DDT_PHYSMEM_PERCENT		20
98#define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
99
100typedef struct dedup_table {
101	dedup_entry_t	**dedup_hash_array;
102	umem_cache_t	*ddecache;
103	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
104	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
105	uint64_t	ddt_count;
106	int		numhashbits;
107	boolean_t	ddt_full;
108} dedup_table_t;
109
110static int
111high_order_bit(uint64_t n)
112{
113	int count;
114
115	for (count = 0; n != 0; count++)
116		n >>= 1;
117	return (count);
118}
119
120static size_t
121ssread(void *buf, size_t len, FILE *stream)
122{
123	size_t outlen;
124
125	if ((outlen = fread(buf, len, 1, stream)) == 0)
126		return (0);
127
128	return (outlen);
129}
130
131static void
132ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
133    zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
134{
135	dedup_entry_t	*dde;
136
137	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
138		if (ddt->ddt_full == B_FALSE) {
139			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
140			    "Dedup table full.  Deduplication will continue "
141			    "with existing table entries"));
142			ddt->ddt_full = B_TRUE;
143		}
144		return;
145	}
146
147	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
148	    != NULL) {
149		assert(*ddepp == NULL);
150		dde->dde_next = NULL;
151		dde->dde_chksum = *cs;
152		dde->dde_prop = prop;
153		dde->dde_ref = *dr;
154		*ddepp = dde;
155		ddt->cur_ddt_size += sizeof (dedup_entry_t);
156		ddt->ddt_count++;
157	}
158}
159
160/*
161 * Using the specified dedup table, do a lookup for an entry with
162 * the checksum cs.  If found, return the block's reference info
163 * in *dr. Otherwise, insert a new entry in the dedup table, using
164 * the reference information specified by *dr.
165 *
166 * return value:  true - entry was found
167 *		  false - entry was not found
168 */
169static boolean_t
170ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
171    uint64_t prop, dataref_t *dr)
172{
173	uint32_t hashcode;
174	dedup_entry_t **ddepp;
175
176	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
177
178	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
179	    ddepp = &((*ddepp)->dde_next)) {
180		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
181		    (*ddepp)->dde_prop == prop) {
182			*dr = (*ddepp)->dde_ref;
183			return (B_TRUE);
184		}
185	}
186	ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
187	return (B_FALSE);
188}
189
190static int
191cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
192{
193	fletcher_4_incremental_native(buf, len, zc);
194	return (write(outfd, buf, len));
195}
196
197/*
198 * This function is started in a separate thread when the dedup option
199 * has been requested.  The main send thread determines the list of
200 * snapshots to be included in the send stream and makes the ioctl calls
201 * for each one.  But instead of having the ioctl send the output to the
202 * the output fd specified by the caller of zfs_send()), the
203 * ioctl is told to direct the output to a pipe, which is read by the
204 * alternate thread running THIS function.  This function does the
205 * dedup'ing by:
206 *  1. building a dedup table (the DDT)
207 *  2. doing checksums on each data block and inserting a record in the DDT
208 *  3. looking for matching checksums, and
209 *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
210 *      a duplicate block is found.
211 * The output of this function then goes to the output fd requested
212 * by the caller of zfs_send().
213 */
214static void *
215cksummer(void *arg)
216{
217	dedup_arg_t *dda = arg;
218	char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
219	dmu_replay_record_t thedrr;
220	dmu_replay_record_t *drr = &thedrr;
221	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
222	struct drr_end *drre = &thedrr.drr_u.drr_end;
223	struct drr_object *drro = &thedrr.drr_u.drr_object;
224	struct drr_write *drrw = &thedrr.drr_u.drr_write;
225	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
226	struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
227	FILE *ofp;
228	int outfd;
229	dmu_replay_record_t wbr_drr = {0};
230	struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
231	dedup_table_t ddt;
232	zio_cksum_t stream_cksum;
233	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
234	uint64_t numbuckets;
235
236	ddt.max_ddt_size =
237	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
238	    SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
239
240	numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
241
242	/*
243	 * numbuckets must be a power of 2.  Increase number to
244	 * a power of 2 if necessary.
245	 */
246	if (!ISP2(numbuckets))
247		numbuckets = 1 << high_order_bit(numbuckets);
248
249	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
250	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
251	    NULL, NULL, NULL, NULL, NULL, 0);
252	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
253	ddt.numhashbits = high_order_bit(numbuckets) - 1;
254	ddt.ddt_full = B_FALSE;
255
256	/* Initialize the write-by-reference block. */
257	wbr_drr.drr_type = DRR_WRITE_BYREF;
258	wbr_drr.drr_payloadlen = 0;
259
260	outfd = dda->outputfd;
261	ofp = fdopen(dda->inputfd, "r");
262	while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
263
264		switch (drr->drr_type) {
265		case DRR_BEGIN:
266		{
267			int	fflags;
268			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
269
270			/* set the DEDUP feature flag for this stream */
271			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
272			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
273			    DMU_BACKUP_FEATURE_DEDUPPROPS);
274			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
275
276			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
277			    &stream_cksum, outfd) == -1)
278				goto out;
279			if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
280			    DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
281				int sz = drr->drr_payloadlen;
282
283				if (sz > SPA_MAXBLOCKSIZE) {
284					buf = zfs_realloc(dda->dedup_hdl, buf,
285					    SPA_MAXBLOCKSIZE, sz);
286				}
287				(void) ssread(buf, sz, ofp);
288				if (ferror(stdin))
289					perror("fread");
290				if (cksum_and_write(buf, sz, &stream_cksum,
291				    outfd) == -1)
292					goto out;
293			}
294			break;
295		}
296
297		case DRR_END:
298		{
299			/* use the recalculated checksum */
300			ZIO_SET_CHECKSUM(&drre->drr_checksum,
301			    stream_cksum.zc_word[0], stream_cksum.zc_word[1],
302			    stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
303			if ((write(outfd, drr,
304			    sizeof (dmu_replay_record_t))) == -1)
305				goto out;
306			break;
307		}
308
309		case DRR_OBJECT:
310		{
311			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
312			    &stream_cksum, outfd) == -1)
313				goto out;
314			if (drro->drr_bonuslen > 0) {
315				(void) ssread(buf,
316				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
317				    ofp);
318				if (cksum_and_write(buf,
319				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
320				    &stream_cksum, outfd) == -1)
321					goto out;
322			}
323			break;
324		}
325
326		case DRR_SPILL:
327		{
328			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
329			    &stream_cksum, outfd) == -1)
330				goto out;
331			(void) ssread(buf, drrs->drr_length, ofp);
332			if (cksum_and_write(buf, drrs->drr_length,
333			    &stream_cksum, outfd) == -1)
334				goto out;
335			break;
336		}
337
338		case DRR_FREEOBJECTS:
339		{
340			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
341			    &stream_cksum, outfd) == -1)
342				goto out;
343			break;
344		}
345
346		case DRR_WRITE:
347		{
348			dataref_t	dataref;
349
350			(void) ssread(buf, drrw->drr_length, ofp);
351
352			/*
353			 * Use the existing checksum if it's dedup-capable,
354			 * else calculate a SHA256 checksum for it.
355			 */
356
357			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
358			    zero_cksum) ||
359			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
360				SHA256_CTX	ctx;
361				zio_cksum_t	tmpsha256;
362
363				SHA256Init(&ctx);
364				SHA256Update(&ctx, buf, drrw->drr_length);
365				SHA256Final(&tmpsha256, &ctx);
366				drrw->drr_key.ddk_cksum.zc_word[0] =
367				    BE_64(tmpsha256.zc_word[0]);
368				drrw->drr_key.ddk_cksum.zc_word[1] =
369				    BE_64(tmpsha256.zc_word[1]);
370				drrw->drr_key.ddk_cksum.zc_word[2] =
371				    BE_64(tmpsha256.zc_word[2]);
372				drrw->drr_key.ddk_cksum.zc_word[3] =
373				    BE_64(tmpsha256.zc_word[3]);
374				drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
375				drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
376			}
377
378			dataref.ref_guid = drrw->drr_toguid;
379			dataref.ref_object = drrw->drr_object;
380			dataref.ref_offset = drrw->drr_offset;
381
382			if (ddt_update(dda->dedup_hdl, &ddt,
383			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
384			    &dataref)) {
385				/* block already present in stream */
386				wbr_drrr->drr_object = drrw->drr_object;
387				wbr_drrr->drr_offset = drrw->drr_offset;
388				wbr_drrr->drr_length = drrw->drr_length;
389				wbr_drrr->drr_toguid = drrw->drr_toguid;
390				wbr_drrr->drr_refguid = dataref.ref_guid;
391				wbr_drrr->drr_refobject =
392				    dataref.ref_object;
393				wbr_drrr->drr_refoffset =
394				    dataref.ref_offset;
395
396				wbr_drrr->drr_checksumtype =
397				    drrw->drr_checksumtype;
398				wbr_drrr->drr_checksumflags =
399				    drrw->drr_checksumtype;
400				wbr_drrr->drr_key.ddk_cksum =
401				    drrw->drr_key.ddk_cksum;
402				wbr_drrr->drr_key.ddk_prop =
403				    drrw->drr_key.ddk_prop;
404
405				if (cksum_and_write(&wbr_drr,
406				    sizeof (dmu_replay_record_t), &stream_cksum,
407				    outfd) == -1)
408					goto out;
409			} else {
410				/* block not previously seen */
411				if (cksum_and_write(drr,
412				    sizeof (dmu_replay_record_t), &stream_cksum,
413				    outfd) == -1)
414					goto out;
415				if (cksum_and_write(buf,
416				    drrw->drr_length,
417				    &stream_cksum, outfd) == -1)
418					goto out;
419			}
420			break;
421		}
422
423		case DRR_WRITE_EMBEDDED:
424		{
425			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
426			    &stream_cksum, outfd) == -1)
427				goto out;
428			(void) ssread(buf,
429			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
430			if (cksum_and_write(buf,
431			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
432			    &stream_cksum, outfd) == -1)
433				goto out;
434			break;
435		}
436
437		case DRR_FREE:
438		{
439			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
440			    &stream_cksum, outfd) == -1)
441				goto out;
442			break;
443		}
444
445		default:
446			(void) printf("INVALID record type 0x%x\n",
447			    drr->drr_type);
448			/* should never happen, so assert */
449			assert(B_FALSE);
450		}
451	}
452out:
453	umem_cache_destroy(ddt.ddecache);
454	free(ddt.dedup_hash_array);
455	free(buf);
456	(void) fclose(ofp);
457
458	return (NULL);
459}
460
461/*
462 * Routines for dealing with the AVL tree of fs-nvlists
463 */
464typedef struct fsavl_node {
465	avl_node_t fn_node;
466	nvlist_t *fn_nvfs;
467	char *fn_snapname;
468	uint64_t fn_guid;
469} fsavl_node_t;
470
471static int
472fsavl_compare(const void *arg1, const void *arg2)
473{
474	const fsavl_node_t *fn1 = arg1;
475	const fsavl_node_t *fn2 = arg2;
476
477	if (fn1->fn_guid > fn2->fn_guid)
478		return (+1);
479	else if (fn1->fn_guid < fn2->fn_guid)
480		return (-1);
481	else
482		return (0);
483}
484
485/*
486 * Given the GUID of a snapshot, find its containing filesystem and
487 * (optionally) name.
488 */
489static nvlist_t *
490fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
491{
492	fsavl_node_t fn_find;
493	fsavl_node_t *fn;
494
495	fn_find.fn_guid = snapguid;
496
497	fn = avl_find(avl, &fn_find, NULL);
498	if (fn) {
499		if (snapname)
500			*snapname = fn->fn_snapname;
501		return (fn->fn_nvfs);
502	}
503	return (NULL);
504}
505
506static void
507fsavl_destroy(avl_tree_t *avl)
508{
509	fsavl_node_t *fn;
510	void *cookie;
511
512	if (avl == NULL)
513		return;
514
515	cookie = NULL;
516	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
517		free(fn);
518	avl_destroy(avl);
519	free(avl);
520}
521
522/*
523 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
524 */
525static avl_tree_t *
526fsavl_create(nvlist_t *fss)
527{
528	avl_tree_t *fsavl;
529	nvpair_t *fselem = NULL;
530
531	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
532		return (NULL);
533
534	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
535	    offsetof(fsavl_node_t, fn_node));
536
537	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
538		nvlist_t *nvfs, *snaps;
539		nvpair_t *snapelem = NULL;
540
541		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
542		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
543
544		while ((snapelem =
545		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
546			fsavl_node_t *fn;
547			uint64_t guid;
548
549			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
550			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
551				fsavl_destroy(fsavl);
552				return (NULL);
553			}
554			fn->fn_nvfs = nvfs;
555			fn->fn_snapname = nvpair_name(snapelem);
556			fn->fn_guid = guid;
557
558			/*
559			 * Note: if there are multiple snaps with the
560			 * same GUID, we ignore all but one.
561			 */
562			if (avl_find(fsavl, fn, NULL) == NULL)
563				avl_add(fsavl, fn);
564			else
565				free(fn);
566		}
567	}
568
569	return (fsavl);
570}
571
572/*
573 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
574 */
575typedef struct send_data {
576	uint64_t parent_fromsnap_guid;
577	nvlist_t *parent_snaps;
578	nvlist_t *fss;
579	nvlist_t *snapprops;
580	const char *fromsnap;
581	const char *tosnap;
582	boolean_t recursive;
583
584	/*
585	 * The header nvlist is of the following format:
586	 * {
587	 *   "tosnap" -> string
588	 *   "fromsnap" -> string (if incremental)
589	 *   "fss" -> {
590	 *	id -> {
591	 *
592	 *	 "name" -> string (full name; for debugging)
593	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
594	 *
595	 *	 "props" -> { name -> value (only if set here) }
596	 *	 "snaps" -> { name (lastname) -> number (guid) }
597	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
598	 *
599	 *	 "origin" -> number (guid) (if clone)
600	 *	 "sent" -> boolean (not on-disk)
601	 *	}
602	 *   }
603	 * }
604	 *
605	 */
606} send_data_t;
607
608static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
609
610static int
611send_iterate_snap(zfs_handle_t *zhp, void *arg)
612{
613	send_data_t *sd = arg;
614	uint64_t guid = zhp->zfs_dmustats.dds_guid;
615	char *snapname;
616	nvlist_t *nv;
617
618	snapname = strrchr(zhp->zfs_name, '@')+1;
619
620	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
621	/*
622	 * NB: if there is no fromsnap here (it's a newly created fs in
623	 * an incremental replication), we will substitute the tosnap.
624	 */
625	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
626	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
627	    strcmp(snapname, sd->tosnap) == 0)) {
628		sd->parent_fromsnap_guid = guid;
629	}
630
631	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
632	send_iterate_prop(zhp, nv);
633	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
634	nvlist_free(nv);
635
636	zfs_close(zhp);
637	return (0);
638}
639
640static void
641send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
642{
643	nvpair_t *elem = NULL;
644
645	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
646		char *propname = nvpair_name(elem);
647		zfs_prop_t prop = zfs_name_to_prop(propname);
648		nvlist_t *propnv;
649
650		if (!zfs_prop_user(propname)) {
651			/*
652			 * Realistically, this should never happen.  However,
653			 * we want the ability to add DSL properties without
654			 * needing to make incompatible version changes.  We
655			 * need to ignore unknown properties to allow older
656			 * software to still send datasets containing these
657			 * properties, with the unknown properties elided.
658			 */
659			if (prop == ZPROP_INVAL)
660				continue;
661
662			if (zfs_prop_readonly(prop))
663				continue;
664		}
665
666		verify(nvpair_value_nvlist(elem, &propnv) == 0);
667		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
668		    prop == ZFS_PROP_REFQUOTA ||
669		    prop == ZFS_PROP_REFRESERVATION) {
670			char *source;
671			uint64_t value;
672			verify(nvlist_lookup_uint64(propnv,
673			    ZPROP_VALUE, &value) == 0);
674			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
675				continue;
676			/*
677			 * May have no source before SPA_VERSION_RECVD_PROPS,
678			 * but is still modifiable.
679			 */
680			if (nvlist_lookup_string(propnv,
681			    ZPROP_SOURCE, &source) == 0) {
682				if ((strcmp(source, zhp->zfs_name) != 0) &&
683				    (strcmp(source,
684				    ZPROP_SOURCE_VAL_RECVD) != 0))
685					continue;
686			}
687		} else {
688			char *source;
689			if (nvlist_lookup_string(propnv,
690			    ZPROP_SOURCE, &source) != 0)
691				continue;
692			if ((strcmp(source, zhp->zfs_name) != 0) &&
693			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
694				continue;
695		}
696
697		if (zfs_prop_user(propname) ||
698		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
699			char *value;
700			verify(nvlist_lookup_string(propnv,
701			    ZPROP_VALUE, &value) == 0);
702			VERIFY(0 == nvlist_add_string(nv, propname, value));
703		} else {
704			uint64_t value;
705			verify(nvlist_lookup_uint64(propnv,
706			    ZPROP_VALUE, &value) == 0);
707			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
708		}
709	}
710}
711
712/*
713 * recursively generate nvlists describing datasets.  See comment
714 * for the data structure send_data_t above for description of contents
715 * of the nvlist.
716 */
717static int
718send_iterate_fs(zfs_handle_t *zhp, void *arg)
719{
720	send_data_t *sd = arg;
721	nvlist_t *nvfs, *nv;
722	int rv = 0;
723	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
724	uint64_t guid = zhp->zfs_dmustats.dds_guid;
725	char guidstring[64];
726
727	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
728	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
729	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
730	    sd->parent_fromsnap_guid));
731
732	if (zhp->zfs_dmustats.dds_origin[0]) {
733		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
734		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
735		if (origin == NULL)
736			return (-1);
737		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
738		    origin->zfs_dmustats.dds_guid));
739	}
740
741	/* iterate over props */
742	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
743	send_iterate_prop(zhp, nv);
744	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
745	nvlist_free(nv);
746
747	/* iterate over snaps, and set sd->parent_fromsnap_guid */
748	sd->parent_fromsnap_guid = 0;
749	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
750	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
751	(void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
752	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
753	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
754	nvlist_free(sd->parent_snaps);
755	nvlist_free(sd->snapprops);
756
757	/* add this fs to nvlist */
758	(void) snprintf(guidstring, sizeof (guidstring),
759	    "0x%llx", (longlong_t)guid);
760	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
761	nvlist_free(nvfs);
762
763	/* iterate over children */
764	if (sd->recursive)
765		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
766
767	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
768
769	zfs_close(zhp);
770	return (rv);
771}
772
773static int
774gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
775    const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
776{
777	zfs_handle_t *zhp;
778	send_data_t sd = { 0 };
779	int error;
780
781	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
782	if (zhp == NULL)
783		return (EZFS_BADTYPE);
784
785	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
786	sd.fromsnap = fromsnap;
787	sd.tosnap = tosnap;
788	sd.recursive = recursive;
789
790	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
791		nvlist_free(sd.fss);
792		if (avlp != NULL)
793			*avlp = NULL;
794		*nvlp = NULL;
795		return (error);
796	}
797
798	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
799		nvlist_free(sd.fss);
800		*nvlp = NULL;
801		return (EZFS_NOMEM);
802	}
803
804	*nvlp = sd.fss;
805	return (0);
806}
807
808/*
809 * Routines specific to "zfs send"
810 */
811typedef struct send_dump_data {
812	/* these are all just the short snapname (the part after the @) */
813	const char *fromsnap;
814	const char *tosnap;
815	char prevsnap[ZFS_MAXNAMELEN];
816	uint64_t prevsnap_obj;
817	boolean_t seenfrom, seento, replicate, doall, fromorigin;
818	boolean_t verbose, dryrun, parsable, progress, embed_data, large_block;
819	int outfd;
820	boolean_t err;
821	nvlist_t *fss;
822	nvlist_t *snapholds;
823	avl_tree_t *fsavl;
824	snapfilter_cb_t *filter_cb;
825	void *filter_cb_arg;
826	nvlist_t *debugnv;
827	char holdtag[ZFS_MAXNAMELEN];
828	int cleanup_fd;
829	uint64_t size;
830} send_dump_data_t;
831
832static int
833estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
834    boolean_t fromorigin, uint64_t *sizep)
835{
836	zfs_cmd_t zc = { 0 };
837	libzfs_handle_t *hdl = zhp->zfs_hdl;
838
839	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
840	assert(fromsnap_obj == 0 || !fromorigin);
841
842	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
843	zc.zc_obj = fromorigin;
844	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
845	zc.zc_fromobj = fromsnap_obj;
846	zc.zc_guid = 1;  /* estimate flag */
847
848	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
849		char errbuf[1024];
850		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
851		    "warning: cannot estimate space for '%s'"), zhp->zfs_name);
852
853		switch (errno) {
854		case EXDEV:
855			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
856			    "not an earlier snapshot from the same fs"));
857			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
858
859		case ENOENT:
860			if (zfs_dataset_exists(hdl, zc.zc_name,
861			    ZFS_TYPE_SNAPSHOT)) {
862				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
863				    "incremental source (@%s) does not exist"),
864				    zc.zc_value);
865			}
866			return (zfs_error(hdl, EZFS_NOENT, errbuf));
867
868		case EDQUOT:
869		case EFBIG:
870		case EIO:
871		case ENOLINK:
872		case ENOSPC:
873		case ENXIO:
874		case EPIPE:
875		case ERANGE:
876		case EFAULT:
877		case EROFS:
878			zfs_error_aux(hdl, strerror(errno));
879			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
880
881		default:
882			return (zfs_standard_error(hdl, errno, errbuf));
883		}
884	}
885
886	*sizep = zc.zc_objset_type;
887
888	return (0);
889}
890
891/*
892 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
893 * NULL) to the file descriptor specified by outfd.
894 */
895static int
896dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
897    boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
898    nvlist_t *debugnv)
899{
900	zfs_cmd_t zc = { 0 };
901	libzfs_handle_t *hdl = zhp->zfs_hdl;
902	nvlist_t *thisdbg;
903
904	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
905	assert(fromsnap_obj == 0 || !fromorigin);
906
907	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
908	zc.zc_cookie = outfd;
909	zc.zc_obj = fromorigin;
910	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
911	zc.zc_fromobj = fromsnap_obj;
912	zc.zc_flags = flags;
913
914	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
915	if (fromsnap && fromsnap[0] != '\0') {
916		VERIFY(0 == nvlist_add_string(thisdbg,
917		    "fromsnap", fromsnap));
918	}
919
920	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
921		char errbuf[1024];
922		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
923		    "warning: cannot send '%s'"), zhp->zfs_name);
924
925		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
926		if (debugnv) {
927			VERIFY(0 == nvlist_add_nvlist(debugnv,
928			    zhp->zfs_name, thisdbg));
929		}
930		nvlist_free(thisdbg);
931
932		switch (errno) {
933		case EXDEV:
934			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
935			    "not an earlier snapshot from the same fs"));
936			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
937
938		case ENOENT:
939			if (zfs_dataset_exists(hdl, zc.zc_name,
940			    ZFS_TYPE_SNAPSHOT)) {
941				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
942				    "incremental source (@%s) does not exist"),
943				    zc.zc_value);
944			}
945			return (zfs_error(hdl, EZFS_NOENT, errbuf));
946
947		case EDQUOT:
948		case EFBIG:
949		case EIO:
950		case ENOLINK:
951		case ENOSPC:
952#ifdef sun
953		case ENOSTR:
954#endif
955		case ENXIO:
956		case EPIPE:
957		case ERANGE:
958		case EFAULT:
959		case EROFS:
960			zfs_error_aux(hdl, strerror(errno));
961			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
962
963		default:
964			return (zfs_standard_error(hdl, errno, errbuf));
965		}
966	}
967
968	if (debugnv)
969		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
970	nvlist_free(thisdbg);
971
972	return (0);
973}
974
975static void
976gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
977{
978	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
979
980	/*
981	 * zfs_send() only sets snapholds for sends that need them,
982	 * e.g. replication and doall.
983	 */
984	if (sdd->snapholds == NULL)
985		return;
986
987	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
988}
989
990static void *
991send_progress_thread(void *arg)
992{
993	progress_arg_t *pa = arg;
994
995	zfs_cmd_t zc = { 0 };
996	zfs_handle_t *zhp = pa->pa_zhp;
997	libzfs_handle_t *hdl = zhp->zfs_hdl;
998	unsigned long long bytes;
999	char buf[16];
1000
1001	time_t t;
1002	struct tm *tm;
1003
1004	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1005	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1006
1007	if (!pa->pa_parsable)
1008		(void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
1009
1010	/*
1011	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1012	 */
1013	for (;;) {
1014		(void) sleep(1);
1015
1016		zc.zc_cookie = pa->pa_fd;
1017		if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1018			return ((void *)-1);
1019
1020		(void) time(&t);
1021		tm = localtime(&t);
1022		bytes = zc.zc_cookie;
1023
1024		if (pa->pa_parsable) {
1025			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1026			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1027			    bytes, zhp->zfs_name);
1028		} else {
1029			zfs_nicenum(bytes, buf, sizeof (buf));
1030			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1031			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1032			    buf, zhp->zfs_name);
1033		}
1034	}
1035}
1036
1037static int
1038dump_snapshot(zfs_handle_t *zhp, void *arg)
1039{
1040	send_dump_data_t *sdd = arg;
1041	progress_arg_t pa = { 0 };
1042	pthread_t tid;
1043	char *thissnap;
1044	int err;
1045	boolean_t isfromsnap, istosnap, fromorigin;
1046	boolean_t exclude = B_FALSE;
1047
1048	err = 0;
1049	thissnap = strchr(zhp->zfs_name, '@') + 1;
1050	isfromsnap = (sdd->fromsnap != NULL &&
1051	    strcmp(sdd->fromsnap, thissnap) == 0);
1052
1053	if (!sdd->seenfrom && isfromsnap) {
1054		gather_holds(zhp, sdd);
1055		sdd->seenfrom = B_TRUE;
1056		(void) strcpy(sdd->prevsnap, thissnap);
1057		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1058		zfs_close(zhp);
1059		return (0);
1060	}
1061
1062	if (sdd->seento || !sdd->seenfrom) {
1063		zfs_close(zhp);
1064		return (0);
1065	}
1066
1067	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1068	if (istosnap)
1069		sdd->seento = B_TRUE;
1070
1071	if (!sdd->doall && !isfromsnap && !istosnap) {
1072		if (sdd->replicate) {
1073			char *snapname;
1074			nvlist_t *snapprops;
1075			/*
1076			 * Filter out all intermediate snapshots except origin
1077			 * snapshots needed to replicate clones.
1078			 */
1079			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1080			    zhp->zfs_dmustats.dds_guid, &snapname);
1081
1082			VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1083			    "snapprops", &snapprops));
1084			VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1085			    thissnap, &snapprops));
1086			exclude = !nvlist_exists(snapprops, "is_clone_origin");
1087		} else {
1088			exclude = B_TRUE;
1089		}
1090	}
1091
1092	/*
1093	 * If a filter function exists, call it to determine whether
1094	 * this snapshot will be sent.
1095	 */
1096	if (exclude || (sdd->filter_cb != NULL &&
1097	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1098		/*
1099		 * This snapshot is filtered out.  Don't send it, and don't
1100		 * set prevsnap_obj, so it will be as if this snapshot didn't
1101		 * exist, and the next accepted snapshot will be sent as
1102		 * an incremental from the last accepted one, or as the
1103		 * first (and full) snapshot in the case of a replication,
1104		 * non-incremental send.
1105		 */
1106		zfs_close(zhp);
1107		return (0);
1108	}
1109
1110	gather_holds(zhp, sdd);
1111	fromorigin = sdd->prevsnap[0] == '\0' &&
1112	    (sdd->fromorigin || sdd->replicate);
1113
1114	if (sdd->verbose) {
1115		uint64_t size;
1116		err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1117		    fromorigin, &size);
1118
1119		if (sdd->parsable) {
1120			if (sdd->prevsnap[0] != '\0') {
1121				(void) fprintf(stderr, "incremental\t%s\t%s",
1122				    sdd->prevsnap, zhp->zfs_name);
1123			} else {
1124				(void) fprintf(stderr, "full\t%s",
1125				    zhp->zfs_name);
1126			}
1127		} else {
1128			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1129			    "send from @%s to %s"),
1130			    sdd->prevsnap, zhp->zfs_name);
1131		}
1132		if (err == 0) {
1133			if (sdd->parsable) {
1134				(void) fprintf(stderr, "\t%llu\n",
1135				    (longlong_t)size);
1136			} else {
1137				char buf[16];
1138				zfs_nicenum(size, buf, sizeof (buf));
1139				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1140				    " estimated size is %s\n"), buf);
1141			}
1142			sdd->size += size;
1143		} else {
1144			(void) fprintf(stderr, "\n");
1145		}
1146	}
1147
1148	if (!sdd->dryrun) {
1149		/*
1150		 * If progress reporting is requested, spawn a new thread to
1151		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1152		 */
1153		if (sdd->progress) {
1154			pa.pa_zhp = zhp;
1155			pa.pa_fd = sdd->outfd;
1156			pa.pa_parsable = sdd->parsable;
1157
1158			if (err = pthread_create(&tid, NULL,
1159			    send_progress_thread, &pa)) {
1160				zfs_close(zhp);
1161				return (err);
1162			}
1163		}
1164
1165		enum lzc_send_flags flags = 0;
1166		if (sdd->large_block)
1167			flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1168		if (sdd->embed_data)
1169			flags |= LZC_SEND_FLAG_EMBED_DATA;
1170
1171		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1172		    fromorigin, sdd->outfd, flags, sdd->debugnv);
1173
1174		if (sdd->progress) {
1175			(void) pthread_cancel(tid);
1176			(void) pthread_join(tid, NULL);
1177		}
1178	}
1179
1180	(void) strcpy(sdd->prevsnap, thissnap);
1181	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1182	zfs_close(zhp);
1183	return (err);
1184}
1185
1186static int
1187dump_filesystem(zfs_handle_t *zhp, void *arg)
1188{
1189	int rv = 0;
1190	send_dump_data_t *sdd = arg;
1191	boolean_t missingfrom = B_FALSE;
1192	zfs_cmd_t zc = { 0 };
1193
1194	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1195	    zhp->zfs_name, sdd->tosnap);
1196	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1197		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1198		    "WARNING: could not send %s@%s: does not exist\n"),
1199		    zhp->zfs_name, sdd->tosnap);
1200		sdd->err = B_TRUE;
1201		return (0);
1202	}
1203
1204	if (sdd->replicate && sdd->fromsnap) {
1205		/*
1206		 * If this fs does not have fromsnap, and we're doing
1207		 * recursive, we need to send a full stream from the
1208		 * beginning (or an incremental from the origin if this
1209		 * is a clone).  If we're doing non-recursive, then let
1210		 * them get the error.
1211		 */
1212		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1213		    zhp->zfs_name, sdd->fromsnap);
1214		if (ioctl(zhp->zfs_hdl->libzfs_fd,
1215		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1216			missingfrom = B_TRUE;
1217		}
1218	}
1219
1220	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1221	sdd->prevsnap_obj = 0;
1222	if (sdd->fromsnap == NULL || missingfrom)
1223		sdd->seenfrom = B_TRUE;
1224
1225	rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1226	if (!sdd->seenfrom) {
1227		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1228		    "WARNING: could not send %s@%s:\n"
1229		    "incremental source (%s@%s) does not exist\n"),
1230		    zhp->zfs_name, sdd->tosnap,
1231		    zhp->zfs_name, sdd->fromsnap);
1232		sdd->err = B_TRUE;
1233	} else if (!sdd->seento) {
1234		if (sdd->fromsnap) {
1235			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1236			    "WARNING: could not send %s@%s:\n"
1237			    "incremental source (%s@%s) "
1238			    "is not earlier than it\n"),
1239			    zhp->zfs_name, sdd->tosnap,
1240			    zhp->zfs_name, sdd->fromsnap);
1241		} else {
1242			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1243			    "WARNING: "
1244			    "could not send %s@%s: does not exist\n"),
1245			    zhp->zfs_name, sdd->tosnap);
1246		}
1247		sdd->err = B_TRUE;
1248	}
1249
1250	return (rv);
1251}
1252
1253static int
1254dump_filesystems(zfs_handle_t *rzhp, void *arg)
1255{
1256	send_dump_data_t *sdd = arg;
1257	nvpair_t *fspair;
1258	boolean_t needagain, progress;
1259
1260	if (!sdd->replicate)
1261		return (dump_filesystem(rzhp, sdd));
1262
1263	/* Mark the clone origin snapshots. */
1264	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1265	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1266		nvlist_t *nvfs;
1267		uint64_t origin_guid = 0;
1268
1269		VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1270		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1271		if (origin_guid != 0) {
1272			char *snapname;
1273			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1274			    origin_guid, &snapname);
1275			if (origin_nv != NULL) {
1276				nvlist_t *snapprops;
1277				VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1278				    "snapprops", &snapprops));
1279				VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1280				    snapname, &snapprops));
1281				VERIFY(0 == nvlist_add_boolean(
1282				    snapprops, "is_clone_origin"));
1283			}
1284		}
1285	}
1286again:
1287	needagain = progress = B_FALSE;
1288	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1289	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1290		nvlist_t *fslist, *parent_nv;
1291		char *fsname;
1292		zfs_handle_t *zhp;
1293		int err;
1294		uint64_t origin_guid = 0;
1295		uint64_t parent_guid = 0;
1296
1297		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1298		if (nvlist_lookup_boolean(fslist, "sent") == 0)
1299			continue;
1300
1301		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1302		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1303		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1304		    &parent_guid);
1305
1306		if (parent_guid != 0) {
1307			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1308			if (!nvlist_exists(parent_nv, "sent")) {
1309				/* parent has not been sent; skip this one */
1310				needagain = B_TRUE;
1311				continue;
1312			}
1313		}
1314
1315		if (origin_guid != 0) {
1316			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1317			    origin_guid, NULL);
1318			if (origin_nv != NULL &&
1319			    !nvlist_exists(origin_nv, "sent")) {
1320				/*
1321				 * origin has not been sent yet;
1322				 * skip this clone.
1323				 */
1324				needagain = B_TRUE;
1325				continue;
1326			}
1327		}
1328
1329		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1330		if (zhp == NULL)
1331			return (-1);
1332		err = dump_filesystem(zhp, sdd);
1333		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1334		progress = B_TRUE;
1335		zfs_close(zhp);
1336		if (err)
1337			return (err);
1338	}
1339	if (needagain) {
1340		assert(progress);
1341		goto again;
1342	}
1343
1344	/* clean out the sent flags in case we reuse this fss */
1345	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1346	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1347		nvlist_t *fslist;
1348
1349		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1350		(void) nvlist_remove_all(fslist, "sent");
1351	}
1352
1353	return (0);
1354}
1355
1356/*
1357 * Generate a send stream for the dataset identified by the argument zhp.
1358 *
1359 * The content of the send stream is the snapshot identified by
1360 * 'tosnap'.  Incremental streams are requested in two ways:
1361 *     - from the snapshot identified by "fromsnap" (if non-null) or
1362 *     - from the origin of the dataset identified by zhp, which must
1363 *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
1364 *	 is TRUE.
1365 *
1366 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1367 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1368 * if "replicate" is set.  If "doall" is set, dump all the intermediate
1369 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1370 * case too. If "props" is set, send properties.
1371 */
1372int
1373zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1374    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1375    void *cb_arg, nvlist_t **debugnvp)
1376{
1377	char errbuf[1024];
1378	send_dump_data_t sdd = { 0 };
1379	int err = 0;
1380	nvlist_t *fss = NULL;
1381	avl_tree_t *fsavl = NULL;
1382	static uint64_t holdseq;
1383	int spa_version;
1384	pthread_t tid = 0;
1385	int pipefd[2];
1386	dedup_arg_t dda = { 0 };
1387	int featureflags = 0;
1388
1389	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1390	    "cannot send '%s'"), zhp->zfs_name);
1391
1392	if (fromsnap && fromsnap[0] == '\0') {
1393		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1394		    "zero-length incremental source"));
1395		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1396	}
1397
1398	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1399		uint64_t version;
1400		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1401		if (version >= ZPL_VERSION_SA) {
1402			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1403		}
1404	}
1405
1406	if (flags->dedup && !flags->dryrun) {
1407		featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1408		    DMU_BACKUP_FEATURE_DEDUPPROPS);
1409		if (err = pipe(pipefd)) {
1410			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1411			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1412			    errbuf));
1413		}
1414		dda.outputfd = outfd;
1415		dda.inputfd = pipefd[1];
1416		dda.dedup_hdl = zhp->zfs_hdl;
1417		if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
1418			(void) close(pipefd[0]);
1419			(void) close(pipefd[1]);
1420			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1421			return (zfs_error(zhp->zfs_hdl,
1422			    EZFS_THREADCREATEFAILED, errbuf));
1423		}
1424	}
1425
1426	if (flags->replicate || flags->doall || flags->props) {
1427		dmu_replay_record_t drr = { 0 };
1428		char *packbuf = NULL;
1429		size_t buflen = 0;
1430		zio_cksum_t zc = { 0 };
1431
1432		if (flags->replicate || flags->props) {
1433			nvlist_t *hdrnv;
1434
1435			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1436			if (fromsnap) {
1437				VERIFY(0 == nvlist_add_string(hdrnv,
1438				    "fromsnap", fromsnap));
1439			}
1440			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1441			if (!flags->replicate) {
1442				VERIFY(0 == nvlist_add_boolean(hdrnv,
1443				    "not_recursive"));
1444			}
1445
1446			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1447			    fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1448			if (err)
1449				goto err_out;
1450			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1451			err = nvlist_pack(hdrnv, &packbuf, &buflen,
1452			    NV_ENCODE_XDR, 0);
1453			if (debugnvp)
1454				*debugnvp = hdrnv;
1455			else
1456				nvlist_free(hdrnv);
1457			if (err)
1458				goto stderr_out;
1459		}
1460
1461		if (!flags->dryrun) {
1462			/* write first begin record */
1463			drr.drr_type = DRR_BEGIN;
1464			drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1465			DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1466			    drr_versioninfo, DMU_COMPOUNDSTREAM);
1467			DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1468			    drr_versioninfo, featureflags);
1469			(void) snprintf(drr.drr_u.drr_begin.drr_toname,
1470			    sizeof (drr.drr_u.drr_begin.drr_toname),
1471			    "%s@%s", zhp->zfs_name, tosnap);
1472			drr.drr_payloadlen = buflen;
1473			err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1474
1475			/* write header nvlist */
1476			if (err != -1 && packbuf != NULL) {
1477				err = cksum_and_write(packbuf, buflen, &zc,
1478				    outfd);
1479			}
1480			free(packbuf);
1481			if (err == -1) {
1482				err = errno;
1483				goto stderr_out;
1484			}
1485
1486			/* write end record */
1487			bzero(&drr, sizeof (drr));
1488			drr.drr_type = DRR_END;
1489			drr.drr_u.drr_end.drr_checksum = zc;
1490			err = write(outfd, &drr, sizeof (drr));
1491			if (err == -1) {
1492				err = errno;
1493				goto stderr_out;
1494			}
1495
1496			err = 0;
1497		}
1498	}
1499
1500	/* dump each stream */
1501	sdd.fromsnap = fromsnap;
1502	sdd.tosnap = tosnap;
1503	if (tid != 0)
1504		sdd.outfd = pipefd[0];
1505	else
1506		sdd.outfd = outfd;
1507	sdd.replicate = flags->replicate;
1508	sdd.doall = flags->doall;
1509	sdd.fromorigin = flags->fromorigin;
1510	sdd.fss = fss;
1511	sdd.fsavl = fsavl;
1512	sdd.verbose = flags->verbose;
1513	sdd.parsable = flags->parsable;
1514	sdd.progress = flags->progress;
1515	sdd.dryrun = flags->dryrun;
1516	sdd.large_block = flags->largeblock;
1517	sdd.embed_data = flags->embed_data;
1518	sdd.filter_cb = filter_func;
1519	sdd.filter_cb_arg = cb_arg;
1520	if (debugnvp)
1521		sdd.debugnv = *debugnvp;
1522
1523	/*
1524	 * Some flags require that we place user holds on the datasets that are
1525	 * being sent so they don't get destroyed during the send. We can skip
1526	 * this step if the pool is imported read-only since the datasets cannot
1527	 * be destroyed.
1528	 */
1529	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1530	    ZPOOL_PROP_READONLY, NULL) &&
1531	    zfs_spa_version(zhp, &spa_version) == 0 &&
1532	    spa_version >= SPA_VERSION_USERREFS &&
1533	    (flags->doall || flags->replicate)) {
1534		++holdseq;
1535		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1536		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1537		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1538		if (sdd.cleanup_fd < 0) {
1539			err = errno;
1540			goto stderr_out;
1541		}
1542		sdd.snapholds = fnvlist_alloc();
1543	} else {
1544		sdd.cleanup_fd = -1;
1545		sdd.snapholds = NULL;
1546	}
1547	if (flags->verbose || sdd.snapholds != NULL) {
1548		/*
1549		 * Do a verbose no-op dry run to get all the verbose output
1550		 * or to gather snapshot hold's before generating any data,
1551		 * then do a non-verbose real run to generate the streams.
1552		 */
1553		sdd.dryrun = B_TRUE;
1554		err = dump_filesystems(zhp, &sdd);
1555
1556		if (err != 0)
1557			goto stderr_out;
1558
1559		if (flags->verbose) {
1560			if (flags->parsable) {
1561				(void) fprintf(stderr, "size\t%llu\n",
1562				    (longlong_t)sdd.size);
1563			} else {
1564				char buf[16];
1565				zfs_nicenum(sdd.size, buf, sizeof (buf));
1566				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1567				    "total estimated size is %s\n"), buf);
1568			}
1569		}
1570
1571		/* Ensure no snaps found is treated as an error. */
1572		if (!sdd.seento) {
1573			err = ENOENT;
1574			goto err_out;
1575		}
1576
1577		/* Skip the second run if dryrun was requested. */
1578		if (flags->dryrun)
1579			goto err_out;
1580
1581		if (sdd.snapholds != NULL) {
1582			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1583			if (err != 0)
1584				goto stderr_out;
1585
1586			fnvlist_free(sdd.snapholds);
1587			sdd.snapholds = NULL;
1588		}
1589
1590		sdd.dryrun = B_FALSE;
1591		sdd.verbose = B_FALSE;
1592	}
1593
1594	err = dump_filesystems(zhp, &sdd);
1595	fsavl_destroy(fsavl);
1596	nvlist_free(fss);
1597
1598	/* Ensure no snaps found is treated as an error. */
1599	if (err == 0 && !sdd.seento)
1600		err = ENOENT;
1601
1602	if (tid != 0) {
1603		if (err != 0)
1604			(void) pthread_cancel(tid);
1605		(void) close(pipefd[0]);
1606		(void) pthread_join(tid, NULL);
1607	}
1608
1609	if (sdd.cleanup_fd != -1) {
1610		VERIFY(0 == close(sdd.cleanup_fd));
1611		sdd.cleanup_fd = -1;
1612	}
1613
1614	if (!flags->dryrun && (flags->replicate || flags->doall ||
1615	    flags->props)) {
1616		/*
1617		 * write final end record.  NB: want to do this even if
1618		 * there was some error, because it might not be totally
1619		 * failed.
1620		 */
1621		dmu_replay_record_t drr = { 0 };
1622		drr.drr_type = DRR_END;
1623		if (write(outfd, &drr, sizeof (drr)) == -1) {
1624			return (zfs_standard_error(zhp->zfs_hdl,
1625			    errno, errbuf));
1626		}
1627	}
1628
1629	return (err || sdd.err);
1630
1631stderr_out:
1632	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1633err_out:
1634	fsavl_destroy(fsavl);
1635	nvlist_free(fss);
1636	fnvlist_free(sdd.snapholds);
1637
1638	if (sdd.cleanup_fd != -1)
1639		VERIFY(0 == close(sdd.cleanup_fd));
1640	if (tid != 0) {
1641		(void) pthread_cancel(tid);
1642		(void) close(pipefd[0]);
1643		(void) pthread_join(tid, NULL);
1644	}
1645	return (err);
1646}
1647
1648int
1649zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
1650    enum lzc_send_flags flags)
1651{
1652	int err;
1653	libzfs_handle_t *hdl = zhp->zfs_hdl;
1654
1655	char errbuf[1024];
1656	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1657	    "warning: cannot send '%s'"), zhp->zfs_name);
1658
1659	err = lzc_send(zhp->zfs_name, from, fd, flags);
1660	if (err != 0) {
1661		switch (errno) {
1662		case EXDEV:
1663			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1664			    "not an earlier snapshot from the same fs"));
1665			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1666
1667		case ENOENT:
1668		case ESRCH:
1669			if (lzc_exists(zhp->zfs_name)) {
1670				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1671				    "incremental source (%s) does not exist"),
1672				    from);
1673			}
1674			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1675
1676		case EBUSY:
1677			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1678			    "target is busy; if a filesystem, "
1679			    "it must not be mounted"));
1680			return (zfs_error(hdl, EZFS_BUSY, errbuf));
1681
1682		case EDQUOT:
1683		case EFBIG:
1684		case EIO:
1685		case ENOLINK:
1686		case ENOSPC:
1687#ifdef illumos
1688		case ENOSTR:
1689#endif
1690		case ENXIO:
1691		case EPIPE:
1692		case ERANGE:
1693		case EFAULT:
1694		case EROFS:
1695			zfs_error_aux(hdl, strerror(errno));
1696			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1697
1698		default:
1699			return (zfs_standard_error(hdl, errno, errbuf));
1700		}
1701	}
1702	return (err != 0);
1703}
1704
1705/*
1706 * Routines specific to "zfs recv"
1707 */
1708
1709static int
1710recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1711    boolean_t byteswap, zio_cksum_t *zc)
1712{
1713	char *cp = buf;
1714	int rv;
1715	int len = ilen;
1716
1717	do {
1718		rv = read(fd, cp, len);
1719		cp += rv;
1720		len -= rv;
1721	} while (rv > 0);
1722
1723	if (rv < 0 || len != 0) {
1724		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1725		    "failed to read from stream"));
1726		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
1727		    "cannot receive")));
1728	}
1729
1730	if (zc) {
1731		if (byteswap)
1732			fletcher_4_incremental_byteswap(buf, ilen, zc);
1733		else
1734			fletcher_4_incremental_native(buf, ilen, zc);
1735	}
1736	return (0);
1737}
1738
1739static int
1740recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
1741    boolean_t byteswap, zio_cksum_t *zc)
1742{
1743	char *buf;
1744	int err;
1745
1746	buf = zfs_alloc(hdl, len);
1747	if (buf == NULL)
1748		return (ENOMEM);
1749
1750	err = recv_read(hdl, fd, buf, len, byteswap, zc);
1751	if (err != 0) {
1752		free(buf);
1753		return (err);
1754	}
1755
1756	err = nvlist_unpack(buf, len, nvp, 0);
1757	free(buf);
1758	if (err != 0) {
1759		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1760		    "stream (malformed nvlist)"));
1761		return (EINVAL);
1762	}
1763	return (0);
1764}
1765
1766static int
1767recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
1768    int baselen, char *newname, recvflags_t *flags)
1769{
1770	static int seq;
1771	zfs_cmd_t zc = { 0 };
1772	int err;
1773	prop_changelist_t *clp;
1774	zfs_handle_t *zhp;
1775
1776	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1777	if (zhp == NULL)
1778		return (-1);
1779	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1780	    flags->force ? MS_FORCE : 0);
1781	zfs_close(zhp);
1782	if (clp == NULL)
1783		return (-1);
1784	err = changelist_prefix(clp);
1785	if (err)
1786		return (err);
1787
1788	zc.zc_objset_type = DMU_OST_ZFS;
1789	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1790
1791	if (tryname) {
1792		(void) strcpy(newname, tryname);
1793
1794		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
1795
1796		if (flags->verbose) {
1797			(void) printf("attempting rename %s to %s\n",
1798			    zc.zc_name, zc.zc_value);
1799		}
1800		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1801		if (err == 0)
1802			changelist_rename(clp, name, tryname);
1803	} else {
1804		err = ENOENT;
1805	}
1806
1807	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
1808		seq++;
1809
1810		(void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u",
1811		    baselen, name, getpid(), seq);
1812		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
1813
1814		if (flags->verbose) {
1815			(void) printf("failed - trying rename %s to %s\n",
1816			    zc.zc_name, zc.zc_value);
1817		}
1818		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1819		if (err == 0)
1820			changelist_rename(clp, name, newname);
1821		if (err && flags->verbose) {
1822			(void) printf("failed (%u) - "
1823			    "will try again on next pass\n", errno);
1824		}
1825		err = EAGAIN;
1826	} else if (flags->verbose) {
1827		if (err == 0)
1828			(void) printf("success\n");
1829		else
1830			(void) printf("failed (%u)\n", errno);
1831	}
1832
1833	(void) changelist_postfix(clp);
1834	changelist_free(clp);
1835
1836	return (err);
1837}
1838
1839static int
1840recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
1841    char *newname, recvflags_t *flags)
1842{
1843	zfs_cmd_t zc = { 0 };
1844	int err = 0;
1845	prop_changelist_t *clp;
1846	zfs_handle_t *zhp;
1847	boolean_t defer = B_FALSE;
1848	int spa_version;
1849
1850	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1851	if (zhp == NULL)
1852		return (-1);
1853	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1854	    flags->force ? MS_FORCE : 0);
1855	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
1856	    zfs_spa_version(zhp, &spa_version) == 0 &&
1857	    spa_version >= SPA_VERSION_USERREFS)
1858		defer = B_TRUE;
1859	zfs_close(zhp);
1860	if (clp == NULL)
1861		return (-1);
1862	err = changelist_prefix(clp);
1863	if (err)
1864		return (err);
1865
1866	zc.zc_objset_type = DMU_OST_ZFS;
1867	zc.zc_defer_destroy = defer;
1868	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1869
1870	if (flags->verbose)
1871		(void) printf("attempting destroy %s\n", zc.zc_name);
1872	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1873	if (err == 0) {
1874		if (flags->verbose)
1875			(void) printf("success\n");
1876		changelist_remove(clp, zc.zc_name);
1877	}
1878
1879	(void) changelist_postfix(clp);
1880	changelist_free(clp);
1881
1882	/*
1883	 * Deferred destroy might destroy the snapshot or only mark it to be
1884	 * destroyed later, and it returns success in either case.
1885	 */
1886	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
1887	    ZFS_TYPE_SNAPSHOT))) {
1888		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1889	}
1890
1891	return (err);
1892}
1893
1894typedef struct guid_to_name_data {
1895	uint64_t guid;
1896	char *name;
1897	char *skip;
1898} guid_to_name_data_t;
1899
1900static int
1901guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1902{
1903	guid_to_name_data_t *gtnd = arg;
1904	int err;
1905
1906	if (gtnd->skip != NULL &&
1907	    strcmp(zhp->zfs_name, gtnd->skip) == 0) {
1908		return (0);
1909	}
1910
1911	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1912		(void) strcpy(gtnd->name, zhp->zfs_name);
1913		zfs_close(zhp);
1914		return (EEXIST);
1915	}
1916
1917	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1918	zfs_close(zhp);
1919	return (err);
1920}
1921
1922/*
1923 * Attempt to find the local dataset associated with this guid.  In the case of
1924 * multiple matches, we attempt to find the "best" match by searching
1925 * progressively larger portions of the hierarchy.  This allows one to send a
1926 * tree of datasets individually and guarantee that we will find the source
1927 * guid within that hierarchy, even if there are multiple matches elsewhere.
1928 */
1929static int
1930guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1931    char *name)
1932{
1933	/* exhaustive search all local snapshots */
1934	char pname[ZFS_MAXNAMELEN];
1935	guid_to_name_data_t gtnd;
1936	int err = 0;
1937	zfs_handle_t *zhp;
1938	char *cp;
1939
1940	gtnd.guid = guid;
1941	gtnd.name = name;
1942	gtnd.skip = NULL;
1943
1944	(void) strlcpy(pname, parent, sizeof (pname));
1945
1946	/*
1947	 * Search progressively larger portions of the hierarchy.  This will
1948	 * select the "most local" version of the origin snapshot in the case
1949	 * that there are multiple matching snapshots in the system.
1950	 */
1951	while ((cp = strrchr(pname, '/')) != NULL) {
1952
1953		/* Chop off the last component and open the parent */
1954		*cp = '\0';
1955		zhp = make_dataset_handle(hdl, pname);
1956
1957		if (zhp == NULL)
1958			continue;
1959
1960		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1961		zfs_close(zhp);
1962		if (err == EEXIST)
1963			return (0);
1964
1965		/*
1966		 * Remember the dataset that we already searched, so we
1967		 * skip it next time through.
1968		 */
1969		gtnd.skip = pname;
1970	}
1971
1972	return (ENOENT);
1973}
1974
1975/*
1976 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
1977 * guid1 is after guid2.
1978 */
1979static int
1980created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1981    uint64_t guid1, uint64_t guid2)
1982{
1983	nvlist_t *nvfs;
1984	char *fsname, *snapname;
1985	char buf[ZFS_MAXNAMELEN];
1986	int rv;
1987	zfs_handle_t *guid1hdl, *guid2hdl;
1988	uint64_t create1, create2;
1989
1990	if (guid2 == 0)
1991		return (0);
1992	if (guid1 == 0)
1993		return (1);
1994
1995	nvfs = fsavl_find(avl, guid1, &snapname);
1996	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1997	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1998	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1999	if (guid1hdl == NULL)
2000		return (-1);
2001
2002	nvfs = fsavl_find(avl, guid2, &snapname);
2003	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2004	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2005	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2006	if (guid2hdl == NULL) {
2007		zfs_close(guid1hdl);
2008		return (-1);
2009	}
2010
2011	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2012	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2013
2014	if (create1 < create2)
2015		rv = -1;
2016	else if (create1 > create2)
2017		rv = +1;
2018	else
2019		rv = 0;
2020
2021	zfs_close(guid1hdl);
2022	zfs_close(guid2hdl);
2023
2024	return (rv);
2025}
2026
2027static int
2028recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2029    recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2030    nvlist_t *renamed)
2031{
2032	nvlist_t *local_nv, *deleted = NULL;
2033	avl_tree_t *local_avl;
2034	nvpair_t *fselem, *nextfselem;
2035	char *fromsnap;
2036	char newname[ZFS_MAXNAMELEN];
2037	char guidname[32];
2038	int error;
2039	boolean_t needagain, progress, recursive;
2040	char *s1, *s2;
2041
2042	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2043
2044	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2045	    ENOENT);
2046
2047	if (flags->dryrun)
2048		return (0);
2049
2050again:
2051	needagain = progress = B_FALSE;
2052
2053	VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
2054
2055	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2056	    recursive, &local_nv, &local_avl)) != 0)
2057		return (error);
2058
2059	/*
2060	 * Process deletes and renames
2061	 */
2062	for (fselem = nvlist_next_nvpair(local_nv, NULL);
2063	    fselem; fselem = nextfselem) {
2064		nvlist_t *nvfs, *snaps;
2065		nvlist_t *stream_nvfs = NULL;
2066		nvpair_t *snapelem, *nextsnapelem;
2067		uint64_t fromguid = 0;
2068		uint64_t originguid = 0;
2069		uint64_t stream_originguid = 0;
2070		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2071		char *fsname, *stream_fsname;
2072
2073		nextfselem = nvlist_next_nvpair(local_nv, fselem);
2074
2075		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2076		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2077		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2078		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2079		    &parent_fromsnap_guid));
2080		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2081
2082		/*
2083		 * First find the stream's fs, so we can check for
2084		 * a different origin (due to "zfs promote")
2085		 */
2086		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2087		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2088			uint64_t thisguid;
2089
2090			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2091			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2092
2093			if (stream_nvfs != NULL)
2094				break;
2095		}
2096
2097		/* check for promote */
2098		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
2099		    &stream_originguid);
2100		if (stream_nvfs && originguid != stream_originguid) {
2101			switch (created_before(hdl, local_avl,
2102			    stream_originguid, originguid)) {
2103			case 1: {
2104				/* promote it! */
2105				zfs_cmd_t zc = { 0 };
2106				nvlist_t *origin_nvfs;
2107				char *origin_fsname;
2108
2109				if (flags->verbose)
2110					(void) printf("promoting %s\n", fsname);
2111
2112				origin_nvfs = fsavl_find(local_avl, originguid,
2113				    NULL);
2114				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2115				    "name", &origin_fsname));
2116				(void) strlcpy(zc.zc_value, origin_fsname,
2117				    sizeof (zc.zc_value));
2118				(void) strlcpy(zc.zc_name, fsname,
2119				    sizeof (zc.zc_name));
2120				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2121				if (error == 0)
2122					progress = B_TRUE;
2123				break;
2124			}
2125			default:
2126				break;
2127			case -1:
2128				fsavl_destroy(local_avl);
2129				nvlist_free(local_nv);
2130				return (-1);
2131			}
2132			/*
2133			 * We had/have the wrong origin, therefore our
2134			 * list of snapshots is wrong.  Need to handle
2135			 * them on the next pass.
2136			 */
2137			needagain = B_TRUE;
2138			continue;
2139		}
2140
2141		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2142		    snapelem; snapelem = nextsnapelem) {
2143			uint64_t thisguid;
2144			char *stream_snapname;
2145			nvlist_t *found, *props;
2146
2147			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2148
2149			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2150			found = fsavl_find(stream_avl, thisguid,
2151			    &stream_snapname);
2152
2153			/* check for delete */
2154			if (found == NULL) {
2155				char name[ZFS_MAXNAMELEN];
2156
2157				if (!flags->force)
2158					continue;
2159
2160				(void) snprintf(name, sizeof (name), "%s@%s",
2161				    fsname, nvpair_name(snapelem));
2162
2163				error = recv_destroy(hdl, name,
2164				    strlen(fsname)+1, newname, flags);
2165				if (error)
2166					needagain = B_TRUE;
2167				else
2168					progress = B_TRUE;
2169				sprintf(guidname, "%lu", thisguid);
2170				nvlist_add_boolean(deleted, guidname);
2171				continue;
2172			}
2173
2174			stream_nvfs = found;
2175
2176			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2177			    &props) && 0 == nvlist_lookup_nvlist(props,
2178			    stream_snapname, &props)) {
2179				zfs_cmd_t zc = { 0 };
2180
2181				zc.zc_cookie = B_TRUE; /* received */
2182				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2183				    "%s@%s", fsname, nvpair_name(snapelem));
2184				if (zcmd_write_src_nvlist(hdl, &zc,
2185				    props) == 0) {
2186					(void) zfs_ioctl(hdl,
2187					    ZFS_IOC_SET_PROP, &zc);
2188					zcmd_free_nvlists(&zc);
2189				}
2190			}
2191
2192			/* check for different snapname */
2193			if (strcmp(nvpair_name(snapelem),
2194			    stream_snapname) != 0) {
2195				char name[ZFS_MAXNAMELEN];
2196				char tryname[ZFS_MAXNAMELEN];
2197
2198				(void) snprintf(name, sizeof (name), "%s@%s",
2199				    fsname, nvpair_name(snapelem));
2200				(void) snprintf(tryname, sizeof (name), "%s@%s",
2201				    fsname, stream_snapname);
2202
2203				error = recv_rename(hdl, name, tryname,
2204				    strlen(fsname)+1, newname, flags);
2205				if (error)
2206					needagain = B_TRUE;
2207				else
2208					progress = B_TRUE;
2209			}
2210
2211			if (strcmp(stream_snapname, fromsnap) == 0)
2212				fromguid = thisguid;
2213		}
2214
2215		/* check for delete */
2216		if (stream_nvfs == NULL) {
2217			if (!flags->force)
2218				continue;
2219
2220			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2221			    newname, flags);
2222			if (error)
2223				needagain = B_TRUE;
2224			else
2225				progress = B_TRUE;
2226			sprintf(guidname, "%lu", parent_fromsnap_guid);
2227			nvlist_add_boolean(deleted, guidname);
2228			continue;
2229		}
2230
2231		if (fromguid == 0) {
2232			if (flags->verbose) {
2233				(void) printf("local fs %s does not have "
2234				    "fromsnap (%s in stream); must have "
2235				    "been deleted locally; ignoring\n",
2236				    fsname, fromsnap);
2237			}
2238			continue;
2239		}
2240
2241		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2242		    "name", &stream_fsname));
2243		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2244		    "parentfromsnap", &stream_parent_fromsnap_guid));
2245
2246		s1 = strrchr(fsname, '/');
2247		s2 = strrchr(stream_fsname, '/');
2248
2249		/*
2250		 * Check if we're going to rename based on parent guid change
2251		 * and the current parent guid was also deleted. If it was then
2252		 * rename will fail and is likely unneeded, so avoid this and
2253		 * force an early retry to determine the new
2254		 * parent_fromsnap_guid.
2255		 */
2256		if (stream_parent_fromsnap_guid != 0 &&
2257                    parent_fromsnap_guid != 0 &&
2258                    stream_parent_fromsnap_guid != parent_fromsnap_guid) {
2259			sprintf(guidname, "%lu", parent_fromsnap_guid);
2260			if (nvlist_exists(deleted, guidname)) {
2261				progress = B_TRUE;
2262				needagain = B_TRUE;
2263				goto doagain;
2264			}
2265		}
2266
2267		/*
2268		 * Check for rename. If the exact receive path is specified, it
2269		 * does not count as a rename, but we still need to check the
2270		 * datasets beneath it.
2271		 */
2272		if ((stream_parent_fromsnap_guid != 0 &&
2273		    parent_fromsnap_guid != 0 &&
2274		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2275		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2276		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2277			nvlist_t *parent;
2278			char tryname[ZFS_MAXNAMELEN];
2279
2280			parent = fsavl_find(local_avl,
2281			    stream_parent_fromsnap_guid, NULL);
2282			/*
2283			 * NB: parent might not be found if we used the
2284			 * tosnap for stream_parent_fromsnap_guid,
2285			 * because the parent is a newly-created fs;
2286			 * we'll be able to rename it after we recv the
2287			 * new fs.
2288			 */
2289			if (parent != NULL) {
2290				char *pname;
2291
2292				VERIFY(0 == nvlist_lookup_string(parent, "name",
2293				    &pname));
2294				(void) snprintf(tryname, sizeof (tryname),
2295				    "%s%s", pname, strrchr(stream_fsname, '/'));
2296			} else {
2297				tryname[0] = '\0';
2298				if (flags->verbose) {
2299					(void) printf("local fs %s new parent "
2300					    "not found\n", fsname);
2301				}
2302			}
2303
2304			newname[0] = '\0';
2305
2306			error = recv_rename(hdl, fsname, tryname,
2307			    strlen(tofs)+1, newname, flags);
2308
2309			if (renamed != NULL && newname[0] != '\0') {
2310				VERIFY(0 == nvlist_add_boolean(renamed,
2311				    newname));
2312			}
2313
2314			if (error)
2315				needagain = B_TRUE;
2316			else
2317				progress = B_TRUE;
2318		}
2319	}
2320
2321doagain:
2322	fsavl_destroy(local_avl);
2323	nvlist_free(local_nv);
2324	nvlist_free(deleted);
2325
2326	if (needagain && progress) {
2327		/* do another pass to fix up temporary names */
2328		if (flags->verbose)
2329			(void) printf("another pass:\n");
2330		goto again;
2331	}
2332
2333	return (needagain);
2334}
2335
2336static int
2337zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2338    recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2339    char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2340{
2341	nvlist_t *stream_nv = NULL;
2342	avl_tree_t *stream_avl = NULL;
2343	char *fromsnap = NULL;
2344	char *cp;
2345	char tofs[ZFS_MAXNAMELEN];
2346	char sendfs[ZFS_MAXNAMELEN];
2347	char errbuf[1024];
2348	dmu_replay_record_t drre;
2349	int error;
2350	boolean_t anyerr = B_FALSE;
2351	boolean_t softerr = B_FALSE;
2352	boolean_t recursive;
2353
2354	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2355	    "cannot receive"));
2356
2357	assert(drr->drr_type == DRR_BEGIN);
2358	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2359	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2360	    DMU_COMPOUNDSTREAM);
2361
2362	/*
2363	 * Read in the nvlist from the stream.
2364	 */
2365	if (drr->drr_payloadlen != 0) {
2366		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2367		    &stream_nv, flags->byteswap, zc);
2368		if (error) {
2369			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2370			goto out;
2371		}
2372	}
2373
2374	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2375	    ENOENT);
2376
2377	if (recursive && strchr(destname, '@')) {
2378		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2379		    "cannot specify snapshot name for multi-snapshot stream"));
2380		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2381		goto out;
2382	}
2383
2384	/*
2385	 * Read in the end record and verify checksum.
2386	 */
2387	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2388	    flags->byteswap, NULL)))
2389		goto out;
2390	if (flags->byteswap) {
2391		drre.drr_type = BSWAP_32(drre.drr_type);
2392		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2393		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2394		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2395		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2396		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2397		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2398		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2399		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2400	}
2401	if (drre.drr_type != DRR_END) {
2402		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2403		goto out;
2404	}
2405	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2406		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2407		    "incorrect header checksum"));
2408		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2409		goto out;
2410	}
2411
2412	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2413
2414	if (drr->drr_payloadlen != 0) {
2415		nvlist_t *stream_fss;
2416
2417		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2418		    &stream_fss));
2419		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2420			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2421			    "couldn't allocate avl tree"));
2422			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2423			goto out;
2424		}
2425
2426		if (fromsnap != NULL) {
2427			nvlist_t *renamed = NULL;
2428			nvpair_t *pair = NULL;
2429
2430			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
2431			if (flags->isprefix) {
2432				struct drr_begin *drrb = &drr->drr_u.drr_begin;
2433				int i;
2434
2435				if (flags->istail) {
2436					cp = strrchr(drrb->drr_toname, '/');
2437					if (cp == NULL) {
2438						(void) strlcat(tofs, "/",
2439						    ZFS_MAXNAMELEN);
2440						i = 0;
2441					} else {
2442						i = (cp - drrb->drr_toname);
2443					}
2444				} else {
2445					i = strcspn(drrb->drr_toname, "/@");
2446				}
2447				/* zfs_receive_one() will create_parents() */
2448				(void) strlcat(tofs, &drrb->drr_toname[i],
2449				    ZFS_MAXNAMELEN);
2450				*strchr(tofs, '@') = '\0';
2451			}
2452
2453			if (recursive && !flags->dryrun && !flags->nomount) {
2454				VERIFY(0 == nvlist_alloc(&renamed,
2455				    NV_UNIQUE_NAME, 0));
2456			}
2457
2458			softerr = recv_incremental_replication(hdl, tofs, flags,
2459			    stream_nv, stream_avl, renamed);
2460
2461			/* Unmount renamed filesystems before receiving. */
2462			while ((pair = nvlist_next_nvpair(renamed,
2463			    pair)) != NULL) {
2464				zfs_handle_t *zhp;
2465				prop_changelist_t *clp = NULL;
2466
2467				zhp = zfs_open(hdl, nvpair_name(pair),
2468				    ZFS_TYPE_FILESYSTEM);
2469				if (zhp != NULL) {
2470					clp = changelist_gather(zhp,
2471					    ZFS_PROP_MOUNTPOINT, 0, 0);
2472					zfs_close(zhp);
2473					if (clp != NULL) {
2474						softerr |=
2475						    changelist_prefix(clp);
2476						changelist_free(clp);
2477					}
2478				}
2479			}
2480
2481			nvlist_free(renamed);
2482		}
2483	}
2484
2485	/*
2486	 * Get the fs specified by the first path in the stream (the top level
2487	 * specified by 'zfs send') and pass it to each invocation of
2488	 * zfs_receive_one().
2489	 */
2490	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2491	    ZFS_MAXNAMELEN);
2492	if ((cp = strchr(sendfs, '@')) != NULL)
2493		*cp = '\0';
2494
2495	/* Finally, receive each contained stream */
2496	do {
2497		/*
2498		 * we should figure out if it has a recoverable
2499		 * error, in which case do a recv_skip() and drive on.
2500		 * Note, if we fail due to already having this guid,
2501		 * zfs_receive_one() will take care of it (ie,
2502		 * recv_skip() and return 0).
2503		 */
2504		error = zfs_receive_impl(hdl, destname, flags, fd,
2505		    sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2506		    action_handlep);
2507		if (error == ENODATA) {
2508			error = 0;
2509			break;
2510		}
2511		anyerr |= error;
2512	} while (error == 0);
2513
2514	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2515		/*
2516		 * Now that we have the fs's they sent us, try the
2517		 * renames again.
2518		 */
2519		softerr = recv_incremental_replication(hdl, tofs, flags,
2520		    stream_nv, stream_avl, NULL);
2521	}
2522
2523out:
2524	fsavl_destroy(stream_avl);
2525	if (stream_nv)
2526		nvlist_free(stream_nv);
2527	if (softerr)
2528		error = -2;
2529	if (anyerr)
2530		error = -1;
2531	return (error);
2532}
2533
2534static void
2535trunc_prop_errs(int truncated)
2536{
2537	ASSERT(truncated != 0);
2538
2539	if (truncated == 1)
2540		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2541		    "1 more property could not be set\n"));
2542	else
2543		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2544		    "%d more properties could not be set\n"), truncated);
2545}
2546
2547static int
2548recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2549{
2550	dmu_replay_record_t *drr;
2551	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
2552	char errbuf[1024];
2553
2554	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2555	    "cannot receive:"));
2556
2557	/* XXX would be great to use lseek if possible... */
2558	drr = buf;
2559
2560	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2561	    byteswap, NULL) == 0) {
2562		if (byteswap)
2563			drr->drr_type = BSWAP_32(drr->drr_type);
2564
2565		switch (drr->drr_type) {
2566		case DRR_BEGIN:
2567			/* NB: not to be used on v2 stream packages */
2568			if (drr->drr_payloadlen != 0) {
2569				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2570				    "invalid substream header"));
2571				return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2572			}
2573			break;
2574
2575		case DRR_END:
2576			free(buf);
2577			return (0);
2578
2579		case DRR_OBJECT:
2580			if (byteswap) {
2581				drr->drr_u.drr_object.drr_bonuslen =
2582				    BSWAP_32(drr->drr_u.drr_object.
2583				    drr_bonuslen);
2584			}
2585			(void) recv_read(hdl, fd, buf,
2586			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2587			    B_FALSE, NULL);
2588			break;
2589
2590		case DRR_WRITE:
2591			if (byteswap) {
2592				drr->drr_u.drr_write.drr_length =
2593				    BSWAP_64(drr->drr_u.drr_write.drr_length);
2594			}
2595			(void) recv_read(hdl, fd, buf,
2596			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2597			break;
2598		case DRR_SPILL:
2599			if (byteswap) {
2600				drr->drr_u.drr_write.drr_length =
2601				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
2602			}
2603			(void) recv_read(hdl, fd, buf,
2604			    drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2605			break;
2606		case DRR_WRITE_EMBEDDED:
2607			if (byteswap) {
2608				drr->drr_u.drr_write_embedded.drr_psize =
2609				    BSWAP_32(drr->drr_u.drr_write_embedded.
2610				    drr_psize);
2611			}
2612			(void) recv_read(hdl, fd, buf,
2613			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
2614			    8), B_FALSE, NULL);
2615			break;
2616		case DRR_WRITE_BYREF:
2617		case DRR_FREEOBJECTS:
2618		case DRR_FREE:
2619			break;
2620
2621		default:
2622			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2623			    "invalid record type"));
2624			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2625		}
2626	}
2627
2628	free(buf);
2629	return (-1);
2630}
2631
2632/*
2633 * Restores a backup of tosnap from the file descriptor specified by infd.
2634 */
2635static int
2636zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
2637    recvflags_t *flags, dmu_replay_record_t *drr,
2638    dmu_replay_record_t *drr_noswap, const char *sendfs,
2639    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
2640    uint64_t *action_handlep)
2641{
2642	zfs_cmd_t zc = { 0 };
2643	time_t begin_time;
2644	int ioctl_err, ioctl_errno, err;
2645	char *cp;
2646	struct drr_begin *drrb = &drr->drr_u.drr_begin;
2647	char errbuf[1024];
2648	char prop_errbuf[1024];
2649	const char *chopprefix;
2650	boolean_t newfs = B_FALSE;
2651	boolean_t stream_wantsnewfs;
2652	uint64_t parent_snapguid = 0;
2653	prop_changelist_t *clp = NULL;
2654	nvlist_t *snapprops_nvlist = NULL;
2655	zprop_errflags_t prop_errflags;
2656	boolean_t recursive;
2657
2658	begin_time = time(NULL);
2659
2660	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2661	    "cannot receive"));
2662
2663	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2664	    ENOENT);
2665
2666	if (stream_avl != NULL) {
2667		char *snapname;
2668		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
2669		    &snapname);
2670		nvlist_t *props;
2671		int ret;
2672
2673		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
2674		    &parent_snapguid);
2675		err = nvlist_lookup_nvlist(fs, "props", &props);
2676		if (err)
2677			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
2678
2679		if (flags->canmountoff) {
2680			VERIFY(0 == nvlist_add_uint64(props,
2681			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
2682		}
2683		ret = zcmd_write_src_nvlist(hdl, &zc, props);
2684		if (err)
2685			nvlist_free(props);
2686
2687		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
2688			VERIFY(0 == nvlist_lookup_nvlist(props,
2689			    snapname, &snapprops_nvlist));
2690		}
2691
2692		if (ret != 0)
2693			return (-1);
2694	}
2695
2696	cp = NULL;
2697
2698	/*
2699	 * Determine how much of the snapshot name stored in the stream
2700	 * we are going to tack on to the name they specified on the
2701	 * command line, and how much we are going to chop off.
2702	 *
2703	 * If they specified a snapshot, chop the entire name stored in
2704	 * the stream.
2705	 */
2706	if (flags->istail) {
2707		/*
2708		 * A filesystem was specified with -e. We want to tack on only
2709		 * the tail of the sent snapshot path.
2710		 */
2711		if (strchr(tosnap, '@')) {
2712			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2713			    "argument - snapshot not allowed with -e"));
2714			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2715		}
2716
2717		chopprefix = strrchr(sendfs, '/');
2718
2719		if (chopprefix == NULL) {
2720			/*
2721			 * The tail is the poolname, so we need to
2722			 * prepend a path separator.
2723			 */
2724			int len = strlen(drrb->drr_toname);
2725			cp = malloc(len + 2);
2726			cp[0] = '/';
2727			(void) strcpy(&cp[1], drrb->drr_toname);
2728			chopprefix = cp;
2729		} else {
2730			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
2731		}
2732	} else if (flags->isprefix) {
2733		/*
2734		 * A filesystem was specified with -d. We want to tack on
2735		 * everything but the first element of the sent snapshot path
2736		 * (all but the pool name).
2737		 */
2738		if (strchr(tosnap, '@')) {
2739			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2740			    "argument - snapshot not allowed with -d"));
2741			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2742		}
2743
2744		chopprefix = strchr(drrb->drr_toname, '/');
2745		if (chopprefix == NULL)
2746			chopprefix = strchr(drrb->drr_toname, '@');
2747	} else if (strchr(tosnap, '@') == NULL) {
2748		/*
2749		 * If a filesystem was specified without -d or -e, we want to
2750		 * tack on everything after the fs specified by 'zfs send'.
2751		 */
2752		chopprefix = drrb->drr_toname + strlen(sendfs);
2753	} else {
2754		/* A snapshot was specified as an exact path (no -d or -e). */
2755		if (recursive) {
2756			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2757			    "cannot specify snapshot name for multi-snapshot "
2758			    "stream"));
2759			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2760		}
2761		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
2762	}
2763
2764	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
2765	ASSERT(chopprefix > drrb->drr_toname);
2766	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
2767	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
2768	    chopprefix[0] == '\0');
2769
2770	/*
2771	 * Determine name of destination snapshot, store in zc_value.
2772	 */
2773	(void) strcpy(zc.zc_value, tosnap);
2774	(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
2775#ifdef __FreeBSD__
2776	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
2777		zfs_ioctl_version = get_zfs_ioctl_version();
2778	/*
2779	 * For forward compatibility hide tosnap in zc_value
2780	 */
2781	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
2782		(void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap);
2783#endif
2784	free(cp);
2785	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
2786		zcmd_free_nvlists(&zc);
2787		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2788	}
2789
2790	/*
2791	 * Determine the name of the origin snapshot, store in zc_string.
2792	 */
2793	if (drrb->drr_flags & DRR_FLAG_CLONE) {
2794		if (guid_to_name(hdl, zc.zc_value,
2795		    drrb->drr_fromguid, zc.zc_string) != 0) {
2796			zcmd_free_nvlists(&zc);
2797			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2798			    "local origin for clone %s does not exist"),
2799			    zc.zc_value);
2800			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2801		}
2802		if (flags->verbose)
2803			(void) printf("found clone origin %s\n", zc.zc_string);
2804	}
2805
2806	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
2807	    (drrb->drr_flags & DRR_FLAG_CLONE));
2808
2809	if (stream_wantsnewfs) {
2810		/*
2811		 * if the parent fs does not exist, look for it based on
2812		 * the parent snap GUID
2813		 */
2814		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2815		    "cannot receive new filesystem stream"));
2816
2817		(void) strcpy(zc.zc_name, zc.zc_value);
2818		cp = strrchr(zc.zc_name, '/');
2819		if (cp)
2820			*cp = '\0';
2821		if (cp &&
2822		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2823			char suffix[ZFS_MAXNAMELEN];
2824			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
2825			if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
2826			    zc.zc_value) == 0) {
2827				*strchr(zc.zc_value, '@') = '\0';
2828				(void) strcat(zc.zc_value, suffix);
2829			}
2830		}
2831	} else {
2832		/*
2833		 * if the fs does not exist, look for it based on the
2834		 * fromsnap GUID
2835		 */
2836		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2837		    "cannot receive incremental stream"));
2838
2839		(void) strcpy(zc.zc_name, zc.zc_value);
2840		*strchr(zc.zc_name, '@') = '\0';
2841
2842		/*
2843		 * If the exact receive path was specified and this is the
2844		 * topmost path in the stream, then if the fs does not exist we
2845		 * should look no further.
2846		 */
2847		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
2848		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
2849		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2850			char snap[ZFS_MAXNAMELEN];
2851			(void) strcpy(snap, strchr(zc.zc_value, '@'));
2852			if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
2853			    zc.zc_value) == 0) {
2854				*strchr(zc.zc_value, '@') = '\0';
2855				(void) strcat(zc.zc_value, snap);
2856			}
2857		}
2858	}
2859
2860	(void) strcpy(zc.zc_name, zc.zc_value);
2861	*strchr(zc.zc_name, '@') = '\0';
2862
2863	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2864		zfs_handle_t *zhp;
2865
2866		/*
2867		 * Destination fs exists.  Therefore this should either
2868		 * be an incremental, or the stream specifies a new fs
2869		 * (full stream or clone) and they want us to blow it
2870		 * away (and have therefore specified -F and removed any
2871		 * snapshots).
2872		 */
2873		if (stream_wantsnewfs) {
2874			if (!flags->force) {
2875				zcmd_free_nvlists(&zc);
2876				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2877				    "destination '%s' exists\n"
2878				    "must specify -F to overwrite it"),
2879				    zc.zc_name);
2880				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2881			}
2882			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2883			    &zc) == 0) {
2884				zcmd_free_nvlists(&zc);
2885				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2886				    "destination has snapshots (eg. %s)\n"
2887				    "must destroy them to overwrite it"),
2888				    zc.zc_name);
2889				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2890			}
2891		}
2892
2893		if ((zhp = zfs_open(hdl, zc.zc_name,
2894		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
2895			zcmd_free_nvlists(&zc);
2896			return (-1);
2897		}
2898
2899		if (stream_wantsnewfs &&
2900		    zhp->zfs_dmustats.dds_origin[0]) {
2901			zcmd_free_nvlists(&zc);
2902			zfs_close(zhp);
2903			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2904			    "destination '%s' is a clone\n"
2905			    "must destroy it to overwrite it"),
2906			    zc.zc_name);
2907			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2908		}
2909
2910		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
2911		    stream_wantsnewfs) {
2912			/* We can't do online recv in this case */
2913			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
2914			if (clp == NULL) {
2915				zfs_close(zhp);
2916				zcmd_free_nvlists(&zc);
2917				return (-1);
2918			}
2919			if (changelist_prefix(clp) != 0) {
2920				changelist_free(clp);
2921				zfs_close(zhp);
2922				zcmd_free_nvlists(&zc);
2923				return (-1);
2924			}
2925		}
2926		zfs_close(zhp);
2927	} else {
2928		/*
2929		 * Destination filesystem does not exist.  Therefore we better
2930		 * be creating a new filesystem (either from a full backup, or
2931		 * a clone).  It would therefore be invalid if the user
2932		 * specified only the pool name (i.e. if the destination name
2933		 * contained no slash character).
2934		 */
2935		if (!stream_wantsnewfs ||
2936		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
2937			zcmd_free_nvlists(&zc);
2938			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2939			    "destination '%s' does not exist"), zc.zc_name);
2940			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2941		}
2942
2943		/*
2944		 * Trim off the final dataset component so we perform the
2945		 * recvbackup ioctl to the filesystems's parent.
2946		 */
2947		*cp = '\0';
2948
2949		if (flags->isprefix && !flags->istail && !flags->dryrun &&
2950		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
2951			zcmd_free_nvlists(&zc);
2952			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
2953		}
2954
2955		newfs = B_TRUE;
2956	}
2957
2958	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
2959	zc.zc_cookie = infd;
2960	zc.zc_guid = flags->force;
2961	if (flags->verbose) {
2962		(void) printf("%s %s stream of %s into %s\n",
2963		    flags->dryrun ? "would receive" : "receiving",
2964		    drrb->drr_fromguid ? "incremental" : "full",
2965		    drrb->drr_toname, zc.zc_value);
2966		(void) fflush(stdout);
2967	}
2968
2969	if (flags->dryrun) {
2970		zcmd_free_nvlists(&zc);
2971		return (recv_skip(hdl, infd, flags->byteswap));
2972	}
2973
2974	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
2975	zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
2976	zc.zc_cleanup_fd = cleanup_fd;
2977	zc.zc_action_handle = *action_handlep;
2978
2979	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
2980	ioctl_errno = errno;
2981	prop_errflags = (zprop_errflags_t)zc.zc_obj;
2982
2983	if (err == 0) {
2984		nvlist_t *prop_errors;
2985		VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
2986		    zc.zc_nvlist_dst_size, &prop_errors, 0));
2987
2988		nvpair_t *prop_err = NULL;
2989
2990		while ((prop_err = nvlist_next_nvpair(prop_errors,
2991		    prop_err)) != NULL) {
2992			char tbuf[1024];
2993			zfs_prop_t prop;
2994			int intval;
2995
2996			prop = zfs_name_to_prop(nvpair_name(prop_err));
2997			(void) nvpair_value_int32(prop_err, &intval);
2998			if (strcmp(nvpair_name(prop_err),
2999			    ZPROP_N_MORE_ERRORS) == 0) {
3000				trunc_prop_errs(intval);
3001				break;
3002			} else {
3003				(void) snprintf(tbuf, sizeof (tbuf),
3004				    dgettext(TEXT_DOMAIN,
3005				    "cannot receive %s property on %s"),
3006				    nvpair_name(prop_err), zc.zc_name);
3007				zfs_setprop_error(hdl, prop, intval, tbuf);
3008			}
3009		}
3010		nvlist_free(prop_errors);
3011	}
3012
3013	zc.zc_nvlist_dst = 0;
3014	zc.zc_nvlist_dst_size = 0;
3015	zcmd_free_nvlists(&zc);
3016
3017	if (err == 0 && snapprops_nvlist) {
3018		zfs_cmd_t zc2 = { 0 };
3019
3020		(void) strcpy(zc2.zc_name, zc.zc_value);
3021		zc2.zc_cookie = B_TRUE; /* received */
3022		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
3023			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
3024			zcmd_free_nvlists(&zc2);
3025		}
3026	}
3027
3028	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
3029		/*
3030		 * It may be that this snapshot already exists,
3031		 * in which case we want to consume & ignore it
3032		 * rather than failing.
3033		 */
3034		avl_tree_t *local_avl;
3035		nvlist_t *local_nv, *fs;
3036		cp = strchr(zc.zc_value, '@');
3037
3038		/*
3039		 * XXX Do this faster by just iterating over snaps in
3040		 * this fs.  Also if zc_value does not exist, we will
3041		 * get a strange "does not exist" error message.
3042		 */
3043		*cp = '\0';
3044		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
3045		    &local_nv, &local_avl) == 0) {
3046			*cp = '@';
3047			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
3048			fsavl_destroy(local_avl);
3049			nvlist_free(local_nv);
3050
3051			if (fs != NULL) {
3052				if (flags->verbose) {
3053					(void) printf("snap %s already exists; "
3054					    "ignoring\n", zc.zc_value);
3055				}
3056				err = ioctl_err = recv_skip(hdl, infd,
3057				    flags->byteswap);
3058			}
3059		}
3060		*cp = '@';
3061	}
3062
3063	if (ioctl_err != 0) {
3064		switch (ioctl_errno) {
3065		case ENODEV:
3066			cp = strchr(zc.zc_value, '@');
3067			*cp = '\0';
3068			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3069			    "most recent snapshot of %s does not\n"
3070			    "match incremental source"), zc.zc_value);
3071			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3072			*cp = '@';
3073			break;
3074		case ETXTBSY:
3075			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3076			    "destination %s has been modified\n"
3077			    "since most recent snapshot"), zc.zc_name);
3078			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3079			break;
3080		case EEXIST:
3081			cp = strchr(zc.zc_value, '@');
3082			if (newfs) {
3083				/* it's the containing fs that exists */
3084				*cp = '\0';
3085			}
3086			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3087			    "destination already exists"));
3088			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
3089			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3090			    zc.zc_value);
3091			*cp = '@';
3092			break;
3093		case EINVAL:
3094			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3095			break;
3096		case ECKSUM:
3097			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3098			    "invalid stream (checksum mismatch)"));
3099			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3100			break;
3101		case ENOTSUP:
3102			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3103			    "pool must be upgraded to receive this stream."));
3104			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
3105			break;
3106		case EDQUOT:
3107			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3108			    "destination %s space quota exceeded"), zc.zc_name);
3109			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
3110			break;
3111		default:
3112			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
3113		}
3114	}
3115
3116	/*
3117	 * Mount the target filesystem (if created).  Also mount any
3118	 * children of the target filesystem if we did a replication
3119	 * receive (indicated by stream_avl being non-NULL).
3120	 */
3121	cp = strchr(zc.zc_value, '@');
3122	if (cp && (ioctl_err == 0 || !newfs)) {
3123		zfs_handle_t *h;
3124
3125		*cp = '\0';
3126		h = zfs_open(hdl, zc.zc_value,
3127		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3128		if (h != NULL) {
3129			if (h->zfs_type == ZFS_TYPE_VOLUME) {
3130				*cp = '@';
3131			} else if (newfs || stream_avl) {
3132				/*
3133				 * Track the first/top of hierarchy fs,
3134				 * for mounting and sharing later.
3135				 */
3136				if (top_zfs && *top_zfs == NULL)
3137					*top_zfs = zfs_strdup(hdl, zc.zc_value);
3138			}
3139			zfs_close(h);
3140		}
3141		*cp = '@';
3142	}
3143
3144	if (clp) {
3145		err |= changelist_postfix(clp);
3146		changelist_free(clp);
3147	}
3148
3149	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3150		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3151		    "failed to clear unreceived properties on %s"),
3152		    zc.zc_name);
3153		(void) fprintf(stderr, "\n");
3154	}
3155	if (prop_errflags & ZPROP_ERR_NORESTORE) {
3156		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3157		    "failed to restore original properties on %s"),
3158		    zc.zc_name);
3159		(void) fprintf(stderr, "\n");
3160	}
3161
3162	if (err || ioctl_err)
3163		return (-1);
3164
3165	*action_handlep = zc.zc_action_handle;
3166
3167	if (flags->verbose) {
3168		char buf1[64];
3169		char buf2[64];
3170		uint64_t bytes = zc.zc_cookie;
3171		time_t delta = time(NULL) - begin_time;
3172		if (delta == 0)
3173			delta = 1;
3174		zfs_nicenum(bytes, buf1, sizeof (buf1));
3175		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3176
3177		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3178		    buf1, delta, buf2);
3179	}
3180
3181	return (0);
3182}
3183
3184static int
3185zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3186    int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3187    char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
3188{
3189	int err;
3190	dmu_replay_record_t drr, drr_noswap;
3191	struct drr_begin *drrb = &drr.drr_u.drr_begin;
3192	char errbuf[1024];
3193	zio_cksum_t zcksum = { 0 };
3194	uint64_t featureflags;
3195	int hdrtype;
3196
3197	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3198	    "cannot receive"));
3199
3200	if (flags->isprefix &&
3201	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3202		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3203		    "(%s) does not exist"), tosnap);
3204		return (zfs_error(hdl, EZFS_NOENT, errbuf));
3205	}
3206
3207	/* read in the BEGIN record */
3208	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3209	    &zcksum)))
3210		return (err);
3211
3212	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3213		/* It's the double end record at the end of a package */
3214		return (ENODATA);
3215	}
3216
3217	/* the kernel needs the non-byteswapped begin record */
3218	drr_noswap = drr;
3219
3220	flags->byteswap = B_FALSE;
3221	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3222		/*
3223		 * We computed the checksum in the wrong byteorder in
3224		 * recv_read() above; do it again correctly.
3225		 */
3226		bzero(&zcksum, sizeof (zio_cksum_t));
3227		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
3228		flags->byteswap = B_TRUE;
3229
3230		drr.drr_type = BSWAP_32(drr.drr_type);
3231		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3232		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3233		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3234		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3235		drrb->drr_type = BSWAP_32(drrb->drr_type);
3236		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3237		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3238		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3239	}
3240
3241	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3242		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3243		    "stream (bad magic number)"));
3244		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3245	}
3246
3247	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3248	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3249
3250	if (!DMU_STREAM_SUPPORTED(featureflags) ||
3251	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3252		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3253		    "stream has unsupported feature, feature flags = %lx"),
3254		    featureflags);
3255		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3256	}
3257
3258	if (strchr(drrb->drr_toname, '@') == NULL) {
3259		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3260		    "stream (bad snapshot name)"));
3261		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3262	}
3263
3264	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3265		char nonpackage_sendfs[ZFS_MAXNAMELEN];
3266		if (sendfs == NULL) {
3267			/*
3268			 * We were not called from zfs_receive_package(). Get
3269			 * the fs specified by 'zfs send'.
3270			 */
3271			char *cp;
3272			(void) strlcpy(nonpackage_sendfs,
3273			    drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
3274			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3275				*cp = '\0';
3276			sendfs = nonpackage_sendfs;
3277		}
3278		return (zfs_receive_one(hdl, infd, tosnap, flags,
3279		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
3280		    top_zfs, cleanup_fd, action_handlep));
3281	} else {
3282		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3283		    DMU_COMPOUNDSTREAM);
3284		return (zfs_receive_package(hdl, infd, tosnap, flags,
3285		    &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
3286	}
3287}
3288
3289/*
3290 * Restores a backup of tosnap from the file descriptor specified by infd.
3291 * Return 0 on total success, -2 if some things couldn't be
3292 * destroyed/renamed/promoted, -1 if some things couldn't be received.
3293 * (-1 will override -2).
3294 */
3295int
3296zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3297    int infd, avl_tree_t *stream_avl)
3298{
3299	char *top_zfs = NULL;
3300	int err;
3301	int cleanup_fd;
3302	uint64_t action_handle = 0;
3303
3304	cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
3305	VERIFY(cleanup_fd >= 0);
3306
3307	err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
3308	    stream_avl, &top_zfs, cleanup_fd, &action_handle);
3309
3310	VERIFY(0 == close(cleanup_fd));
3311
3312	if (err == 0 && !flags->nomount && top_zfs) {
3313		zfs_handle_t *zhp;
3314		prop_changelist_t *clp;
3315
3316		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3317		if (zhp != NULL) {
3318			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3319			    CL_GATHER_MOUNT_ALWAYS, 0);
3320			zfs_close(zhp);
3321			if (clp != NULL) {
3322				/* mount and share received datasets */
3323				err = changelist_postfix(clp);
3324				changelist_free(clp);
3325			}
3326		}
3327		if (zhp == NULL || clp == NULL || err)
3328			err = -1;
3329	}
3330	if (top_zfs)
3331		free(top_zfs);
3332
3333	return (err);
3334}
3335