1185029Spjd/*
2185029Spjd * CDDL HEADER START
3185029Spjd *
4185029Spjd * The contents of this file are subject to the terms of the
5185029Spjd * Common Development and Distribution License (the "License").
6185029Spjd * You may not use this file except in compliance with the License.
7185029Spjd *
8185029Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9185029Spjd * or http://www.opensolaris.org/os/licensing.
10185029Spjd * See the License for the specific language governing permissions
11185029Spjd * and limitations under the License.
12185029Spjd *
13185029Spjd * When distributing Covered Code, include this CDDL HEADER in each
14185029Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15185029Spjd * If applicable, add the following below this CDDL HEADER, with the
16185029Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17185029Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18185029Spjd *
19185029Spjd * CDDL HEADER END
20185029Spjd */
21185029Spjd
22185029Spjd/*
23219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24290756Smav * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25235222Smm * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26307053Smav * Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved.
27251646Sdelphij * Copyright (c) 2013 Steven Hartland. All rights reserved.
28297103Smav * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
29297112Smav * Copyright (c) 2014 Integros [integros.com]
30307058Smav * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31185029Spjd */
32185029Spjd
33185029Spjd#include <assert.h>
34185029Spjd#include <ctype.h>
35185029Spjd#include <errno.h>
36185029Spjd#include <libintl.h>
37185029Spjd#include <stdio.h>
38185029Spjd#include <stdlib.h>
39185029Spjd#include <strings.h>
40185029Spjd#include <unistd.h>
41185029Spjd#include <stddef.h>
42185029Spjd#include <fcntl.h>
43186515Srwatson#include <sys/param.h>
44185029Spjd#include <sys/mount.h>
45219089Spjd#include <pthread.h>
46219089Spjd#include <umem.h>
47235222Smm#include <time.h>
48185029Spjd
49185029Spjd#include <libzfs.h>
50268649Sdelphij#include <libzfs_core.h>
51185029Spjd
52185029Spjd#include "zfs_namecheck.h"
53185029Spjd#include "zfs_prop.h"
54219089Spjd#include "zfs_fletcher.h"
55185029Spjd#include "libzfs_impl.h"
56290756Smav#include <zlib.h>
57219089Spjd#include <sha2.h>
58219089Spjd#include <sys/zio_checksum.h>
59219089Spjd#include <sys/ddt.h>
60185029Spjd
61248571Smm#ifdef __FreeBSD__
62248571Smmextern int zfs_ioctl_version;
63248571Smm#endif
64248571Smm
65219089Spjd/* in libzfs_dataset.c */
66219089Spjdextern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
67185029Spjd/* We need to use something for ENODATA. */
68185029Spjd#define	ENODATA	EIDRM
69185029Spjd
70288571Smavstatic int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
71288571Smav    recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
72297103Smav    uint64_t *, const char *);
73290756Smavstatic int guid_to_name(libzfs_handle_t *, const char *,
74290756Smav    uint64_t, boolean_t, char *);
75185029Spjd
76219089Spjdstatic const zio_cksum_t zero_cksum = { 0 };
77219089Spjd
78219089Spjdtypedef struct dedup_arg {
79219089Spjd	int	inputfd;
80219089Spjd	int	outputfd;
81219089Spjd	libzfs_handle_t  *dedup_hdl;
82219089Spjd} dedup_arg_t;
83219089Spjd
84235222Smmtypedef struct progress_arg {
85235222Smm	zfs_handle_t *pa_zhp;
86235222Smm	int pa_fd;
87235222Smm	boolean_t pa_parsable;
88235222Smm} progress_arg_t;
89235222Smm
90219089Spjdtypedef struct dataref {
91219089Spjd	uint64_t ref_guid;
92219089Spjd	uint64_t ref_object;
93219089Spjd	uint64_t ref_offset;
94219089Spjd} dataref_t;
95219089Spjd
96219089Spjdtypedef struct dedup_entry {
97219089Spjd	struct dedup_entry	*dde_next;
98219089Spjd	zio_cksum_t dde_chksum;
99219089Spjd	uint64_t dde_prop;
100219089Spjd	dataref_t dde_ref;
101219089Spjd} dedup_entry_t;
102219089Spjd
103219089Spjd#define	MAX_DDT_PHYSMEM_PERCENT		20
104219089Spjd#define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
105219089Spjd
106219089Spjdtypedef struct dedup_table {
107219089Spjd	dedup_entry_t	**dedup_hash_array;
108219089Spjd	umem_cache_t	*ddecache;
109219089Spjd	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
110219089Spjd	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
111219089Spjd	uint64_t	ddt_count;
112219089Spjd	int		numhashbits;
113219089Spjd	boolean_t	ddt_full;
114219089Spjd} dedup_table_t;
115219089Spjd
116219089Spjdstatic int
117219089Spjdhigh_order_bit(uint64_t n)
118219089Spjd{
119219089Spjd	int count;
120219089Spjd
121219089Spjd	for (count = 0; n != 0; count++)
122219089Spjd		n >>= 1;
123219089Spjd	return (count);
124219089Spjd}
125219089Spjd
126219089Spjdstatic size_t
127219089Spjdssread(void *buf, size_t len, FILE *stream)
128219089Spjd{
129219089Spjd	size_t outlen;
130219089Spjd
131219089Spjd	if ((outlen = fread(buf, len, 1, stream)) == 0)
132219089Spjd		return (0);
133219089Spjd
134219089Spjd	return (outlen);
135219089Spjd}
136219089Spjd
137219089Spjdstatic void
138219089Spjdddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
139219089Spjd    zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
140219089Spjd{
141219089Spjd	dedup_entry_t	*dde;
142219089Spjd
143219089Spjd	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
144219089Spjd		if (ddt->ddt_full == B_FALSE) {
145219089Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
146219089Spjd			    "Dedup table full.  Deduplication will continue "
147219089Spjd			    "with existing table entries"));
148219089Spjd			ddt->ddt_full = B_TRUE;
149219089Spjd		}
150219089Spjd		return;
151219089Spjd	}
152219089Spjd
153219089Spjd	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
154219089Spjd	    != NULL) {
155219089Spjd		assert(*ddepp == NULL);
156219089Spjd		dde->dde_next = NULL;
157219089Spjd		dde->dde_chksum = *cs;
158219089Spjd		dde->dde_prop = prop;
159219089Spjd		dde->dde_ref = *dr;
160219089Spjd		*ddepp = dde;
161219089Spjd		ddt->cur_ddt_size += sizeof (dedup_entry_t);
162219089Spjd		ddt->ddt_count++;
163219089Spjd	}
164219089Spjd}
165219089Spjd
166185029Spjd/*
167219089Spjd * Using the specified dedup table, do a lookup for an entry with
168219089Spjd * the checksum cs.  If found, return the block's reference info
169219089Spjd * in *dr. Otherwise, insert a new entry in the dedup table, using
170219089Spjd * the reference information specified by *dr.
171219089Spjd *
172219089Spjd * return value:  true - entry was found
173219089Spjd *		  false - entry was not found
174219089Spjd */
175219089Spjdstatic boolean_t
176219089Spjdddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
177219089Spjd    uint64_t prop, dataref_t *dr)
178219089Spjd{
179219089Spjd	uint32_t hashcode;
180219089Spjd	dedup_entry_t **ddepp;
181219089Spjd
182219089Spjd	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
183219089Spjd
184219089Spjd	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
185219089Spjd	    ddepp = &((*ddepp)->dde_next)) {
186219089Spjd		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
187219089Spjd		    (*ddepp)->dde_prop == prop) {
188219089Spjd			*dr = (*ddepp)->dde_ref;
189219089Spjd			return (B_TRUE);
190219089Spjd		}
191219089Spjd	}
192219089Spjd	ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
193219089Spjd	return (B_FALSE);
194219089Spjd}
195219089Spjd
196219089Spjdstatic int
197288553Smavdump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
198288553Smav    zio_cksum_t *zc, int outfd)
199219089Spjd{
200288553Smav	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
201288553Smav	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
202288553Smav	fletcher_4_incremental_native(drr,
203288553Smav	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
204288553Smav	if (drr->drr_type != DRR_BEGIN) {
205288553Smav		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
206288553Smav		    drr_checksum.drr_checksum));
207288553Smav		drr->drr_u.drr_checksum.drr_checksum = *zc;
208288553Smav	}
209288553Smav	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
210288553Smav	    sizeof (zio_cksum_t), zc);
211288553Smav	if (write(outfd, drr, sizeof (*drr)) == -1)
212288553Smav		return (errno);
213288553Smav	if (payload_len != 0) {
214288553Smav		fletcher_4_incremental_native(payload, payload_len, zc);
215288553Smav		if (write(outfd, payload, payload_len) == -1)
216288553Smav			return (errno);
217288553Smav	}
218288553Smav	return (0);
219219089Spjd}
220219089Spjd
221219089Spjd/*
222219089Spjd * This function is started in a separate thread when the dedup option
223219089Spjd * has been requested.  The main send thread determines the list of
224219089Spjd * snapshots to be included in the send stream and makes the ioctl calls
225219089Spjd * for each one.  But instead of having the ioctl send the output to the
226219089Spjd * the output fd specified by the caller of zfs_send()), the
227219089Spjd * ioctl is told to direct the output to a pipe, which is read by the
228219089Spjd * alternate thread running THIS function.  This function does the
229219089Spjd * dedup'ing by:
230219089Spjd *  1. building a dedup table (the DDT)
231219089Spjd *  2. doing checksums on each data block and inserting a record in the DDT
232219089Spjd *  3. looking for matching checksums, and
233219089Spjd *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
234219089Spjd *      a duplicate block is found.
235219089Spjd * The output of this function then goes to the output fd requested
236219089Spjd * by the caller of zfs_send().
237219089Spjd */
238219089Spjdstatic void *
239219089Spjdcksummer(void *arg)
240219089Spjd{
241219089Spjd	dedup_arg_t *dda = arg;
242276081Sdelphij	char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
243219089Spjd	dmu_replay_record_t thedrr;
244219089Spjd	dmu_replay_record_t *drr = &thedrr;
245219089Spjd	FILE *ofp;
246219089Spjd	int outfd;
247219089Spjd	dedup_table_t ddt;
248219089Spjd	zio_cksum_t stream_cksum;
249219089Spjd	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
250219089Spjd	uint64_t numbuckets;
251219089Spjd
252219089Spjd	ddt.max_ddt_size =
253288553Smav	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
254288553Smav	    SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
255219089Spjd
256288553Smav	numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
257219089Spjd
258219089Spjd	/*
259219089Spjd	 * numbuckets must be a power of 2.  Increase number to
260219089Spjd	 * a power of 2 if necessary.
261219089Spjd	 */
262219089Spjd	if (!ISP2(numbuckets))
263219089Spjd		numbuckets = 1 << high_order_bit(numbuckets);
264219089Spjd
265219089Spjd	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
266219089Spjd	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
267219089Spjd	    NULL, NULL, NULL, NULL, NULL, 0);
268219089Spjd	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
269219089Spjd	ddt.numhashbits = high_order_bit(numbuckets) - 1;
270219089Spjd	ddt.ddt_full = B_FALSE;
271219089Spjd
272219089Spjd	outfd = dda->outputfd;
273219089Spjd	ofp = fdopen(dda->inputfd, "r");
274288553Smav	while (ssread(drr, sizeof (*drr), ofp) != 0) {
275219089Spjd
276219089Spjd		switch (drr->drr_type) {
277219089Spjd		case DRR_BEGIN:
278219089Spjd		{
279288553Smav			struct drr_begin *drrb = &drr->drr_u.drr_begin;
280288553Smav			int fflags;
281288553Smav			int sz = 0;
282219089Spjd			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
283219089Spjd
284288553Smav			ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
285288553Smav
286219089Spjd			/* set the DEDUP feature flag for this stream */
287219089Spjd			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
288219089Spjd			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
289219089Spjd			    DMU_BACKUP_FEATURE_DEDUPPROPS);
290219089Spjd			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
291219089Spjd
292290756Smav			if (drr->drr_payloadlen != 0) {
293288553Smav				sz = drr->drr_payloadlen;
294219089Spjd
295276081Sdelphij				if (sz > SPA_MAXBLOCKSIZE) {
296276081Sdelphij					buf = zfs_realloc(dda->dedup_hdl, buf,
297276081Sdelphij					    SPA_MAXBLOCKSIZE, sz);
298219089Spjd				}
299219089Spjd				(void) ssread(buf, sz, ofp);
300219089Spjd				if (ferror(stdin))
301219089Spjd					perror("fread");
302219089Spjd			}
303288553Smav			if (dump_record(drr, buf, sz, &stream_cksum,
304288553Smav			    outfd) != 0)
305288553Smav				goto out;
306219089Spjd			break;
307219089Spjd		}
308219089Spjd
309219089Spjd		case DRR_END:
310219089Spjd		{
311288553Smav			struct drr_end *drre = &drr->drr_u.drr_end;
312219089Spjd			/* use the recalculated checksum */
313288553Smav			drre->drr_checksum = stream_cksum;
314288553Smav			if (dump_record(drr, NULL, 0, &stream_cksum,
315288553Smav			    outfd) != 0)
316219089Spjd				goto out;
317219089Spjd			break;
318219089Spjd		}
319219089Spjd
320219089Spjd		case DRR_OBJECT:
321219089Spjd		{
322288553Smav			struct drr_object *drro = &drr->drr_u.drr_object;
323219089Spjd			if (drro->drr_bonuslen > 0) {
324219089Spjd				(void) ssread(buf,
325219089Spjd				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
326219089Spjd				    ofp);
327219089Spjd			}
328288553Smav			if (dump_record(drr, buf,
329288553Smav			    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
330288553Smav			    &stream_cksum, outfd) != 0)
331288553Smav				goto out;
332219089Spjd			break;
333219089Spjd		}
334219089Spjd
335219089Spjd		case DRR_SPILL:
336219089Spjd		{
337288553Smav			struct drr_spill *drrs = &drr->drr_u.drr_spill;
338219089Spjd			(void) ssread(buf, drrs->drr_length, ofp);
339288553Smav			if (dump_record(drr, buf, drrs->drr_length,
340288553Smav			    &stream_cksum, outfd) != 0)
341219089Spjd				goto out;
342219089Spjd			break;
343219089Spjd		}
344219089Spjd
345219089Spjd		case DRR_FREEOBJECTS:
346219089Spjd		{
347288553Smav			if (dump_record(drr, NULL, 0, &stream_cksum,
348288553Smav			    outfd) != 0)
349219089Spjd				goto out;
350219089Spjd			break;
351219089Spjd		}
352219089Spjd
353219089Spjd		case DRR_WRITE:
354219089Spjd		{
355288553Smav			struct drr_write *drrw = &drr->drr_u.drr_write;
356219089Spjd			dataref_t	dataref;
357219089Spjd
358219089Spjd			(void) ssread(buf, drrw->drr_length, ofp);
359219089Spjd
360219089Spjd			/*
361219089Spjd			 * Use the existing checksum if it's dedup-capable,
362219089Spjd			 * else calculate a SHA256 checksum for it.
363219089Spjd			 */
364219089Spjd
365219089Spjd			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
366219089Spjd			    zero_cksum) ||
367219089Spjd			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
368219089Spjd				SHA256_CTX	ctx;
369219089Spjd				zio_cksum_t	tmpsha256;
370219089Spjd
371219089Spjd				SHA256Init(&ctx);
372219089Spjd				SHA256Update(&ctx, buf, drrw->drr_length);
373219089Spjd				SHA256Final(&tmpsha256, &ctx);
374219089Spjd				drrw->drr_key.ddk_cksum.zc_word[0] =
375219089Spjd				    BE_64(tmpsha256.zc_word[0]);
376219089Spjd				drrw->drr_key.ddk_cksum.zc_word[1] =
377219089Spjd				    BE_64(tmpsha256.zc_word[1]);
378219089Spjd				drrw->drr_key.ddk_cksum.zc_word[2] =
379219089Spjd				    BE_64(tmpsha256.zc_word[2]);
380219089Spjd				drrw->drr_key.ddk_cksum.zc_word[3] =
381219089Spjd				    BE_64(tmpsha256.zc_word[3]);
382219089Spjd				drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
383219089Spjd				drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
384219089Spjd			}
385219089Spjd
386219089Spjd			dataref.ref_guid = drrw->drr_toguid;
387219089Spjd			dataref.ref_object = drrw->drr_object;
388219089Spjd			dataref.ref_offset = drrw->drr_offset;
389219089Spjd
390219089Spjd			if (ddt_update(dda->dedup_hdl, &ddt,
391219089Spjd			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
392219089Spjd			    &dataref)) {
393288553Smav				dmu_replay_record_t wbr_drr = {0};
394288553Smav				struct drr_write_byref *wbr_drrr =
395288553Smav				    &wbr_drr.drr_u.drr_write_byref;
396288553Smav
397219089Spjd				/* block already present in stream */
398288553Smav				wbr_drr.drr_type = DRR_WRITE_BYREF;
399288553Smav
400219089Spjd				wbr_drrr->drr_object = drrw->drr_object;
401219089Spjd				wbr_drrr->drr_offset = drrw->drr_offset;
402219089Spjd				wbr_drrr->drr_length = drrw->drr_length;
403219089Spjd				wbr_drrr->drr_toguid = drrw->drr_toguid;
404219089Spjd				wbr_drrr->drr_refguid = dataref.ref_guid;
405219089Spjd				wbr_drrr->drr_refobject =
406219089Spjd				    dataref.ref_object;
407219089Spjd				wbr_drrr->drr_refoffset =
408219089Spjd				    dataref.ref_offset;
409219089Spjd
410219089Spjd				wbr_drrr->drr_checksumtype =
411219089Spjd				    drrw->drr_checksumtype;
412219089Spjd				wbr_drrr->drr_checksumflags =
413219089Spjd				    drrw->drr_checksumtype;
414219089Spjd				wbr_drrr->drr_key.ddk_cksum =
415219089Spjd				    drrw->drr_key.ddk_cksum;
416219089Spjd				wbr_drrr->drr_key.ddk_prop =
417219089Spjd				    drrw->drr_key.ddk_prop;
418219089Spjd
419288553Smav				if (dump_record(&wbr_drr, NULL, 0,
420288553Smav				    &stream_cksum, outfd) != 0)
421219089Spjd					goto out;
422219089Spjd			} else {
423219089Spjd				/* block not previously seen */
424288553Smav				if (dump_record(drr, buf, drrw->drr_length,
425288553Smav				    &stream_cksum, outfd) != 0)
426219089Spjd					goto out;
427219089Spjd			}
428219089Spjd			break;
429219089Spjd		}
430219089Spjd
431268649Sdelphij		case DRR_WRITE_EMBEDDED:
432268649Sdelphij		{
433288553Smav			struct drr_write_embedded *drrwe =
434288553Smav			    &drr->drr_u.drr_write_embedded;
435268649Sdelphij			(void) ssread(buf,
436268649Sdelphij			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
437288553Smav			if (dump_record(drr, buf,
438268649Sdelphij			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
439288553Smav			    &stream_cksum, outfd) != 0)
440268649Sdelphij				goto out;
441268649Sdelphij			break;
442268649Sdelphij		}
443268649Sdelphij
444219089Spjd		case DRR_FREE:
445219089Spjd		{
446288553Smav			if (dump_record(drr, NULL, 0, &stream_cksum,
447288553Smav			    outfd) != 0)
448219089Spjd				goto out;
449219089Spjd			break;
450219089Spjd		}
451219089Spjd
452219089Spjd		default:
453288553Smav			(void) fprintf(stderr, "INVALID record type 0x%x\n",
454219089Spjd			    drr->drr_type);
455219089Spjd			/* should never happen, so assert */
456219089Spjd			assert(B_FALSE);
457219089Spjd		}
458219089Spjd	}
459219089Spjdout:
460219089Spjd	umem_cache_destroy(ddt.ddecache);
461219089Spjd	free(ddt.dedup_hash_array);
462219089Spjd	free(buf);
463219089Spjd	(void) fclose(ofp);
464219089Spjd
465219089Spjd	return (NULL);
466219089Spjd}
467219089Spjd
468219089Spjd/*
469185029Spjd * Routines for dealing with the AVL tree of fs-nvlists
470185029Spjd */
471185029Spjdtypedef struct fsavl_node {
472185029Spjd	avl_node_t fn_node;
473185029Spjd	nvlist_t *fn_nvfs;
474185029Spjd	char *fn_snapname;
475185029Spjd	uint64_t fn_guid;
476185029Spjd} fsavl_node_t;
477185029Spjd
478185029Spjdstatic int
479185029Spjdfsavl_compare(const void *arg1, const void *arg2)
480185029Spjd{
481185029Spjd	const fsavl_node_t *fn1 = arg1;
482185029Spjd	const fsavl_node_t *fn2 = arg2;
483185029Spjd
484185029Spjd	if (fn1->fn_guid > fn2->fn_guid)
485185029Spjd		return (+1);
486185029Spjd	else if (fn1->fn_guid < fn2->fn_guid)
487185029Spjd		return (-1);
488185029Spjd	else
489185029Spjd		return (0);
490185029Spjd}
491185029Spjd
492185029Spjd/*
493185029Spjd * Given the GUID of a snapshot, find its containing filesystem and
494185029Spjd * (optionally) name.
495185029Spjd */
496185029Spjdstatic nvlist_t *
497185029Spjdfsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
498185029Spjd{
499185029Spjd	fsavl_node_t fn_find;
500185029Spjd	fsavl_node_t *fn;
501185029Spjd
502185029Spjd	fn_find.fn_guid = snapguid;
503185029Spjd
504185029Spjd	fn = avl_find(avl, &fn_find, NULL);
505185029Spjd	if (fn) {
506185029Spjd		if (snapname)
507185029Spjd			*snapname = fn->fn_snapname;
508185029Spjd		return (fn->fn_nvfs);
509185029Spjd	}
510185029Spjd	return (NULL);
511185029Spjd}
512185029Spjd
513185029Spjdstatic void
514185029Spjdfsavl_destroy(avl_tree_t *avl)
515185029Spjd{
516185029Spjd	fsavl_node_t *fn;
517185029Spjd	void *cookie;
518185029Spjd
519185029Spjd	if (avl == NULL)
520185029Spjd		return;
521185029Spjd
522185029Spjd	cookie = NULL;
523185029Spjd	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
524185029Spjd		free(fn);
525185029Spjd	avl_destroy(avl);
526185029Spjd	free(avl);
527185029Spjd}
528185029Spjd
529219089Spjd/*
530219089Spjd * Given an nvlist, produce an avl tree of snapshots, ordered by guid
531219089Spjd */
532185029Spjdstatic avl_tree_t *
533185029Spjdfsavl_create(nvlist_t *fss)
534185029Spjd{
535185029Spjd	avl_tree_t *fsavl;
536185029Spjd	nvpair_t *fselem = NULL;
537185029Spjd
538185029Spjd	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
539185029Spjd		return (NULL);
540185029Spjd
541185029Spjd	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
542185029Spjd	    offsetof(fsavl_node_t, fn_node));
543185029Spjd
544185029Spjd	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
545185029Spjd		nvlist_t *nvfs, *snaps;
546185029Spjd		nvpair_t *snapelem = NULL;
547185029Spjd
548185029Spjd		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
549185029Spjd		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
550185029Spjd
551185029Spjd		while ((snapelem =
552185029Spjd		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
553185029Spjd			fsavl_node_t *fn;
554185029Spjd			uint64_t guid;
555185029Spjd
556185029Spjd			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
557185029Spjd			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
558185029Spjd				fsavl_destroy(fsavl);
559185029Spjd				return (NULL);
560185029Spjd			}
561185029Spjd			fn->fn_nvfs = nvfs;
562185029Spjd			fn->fn_snapname = nvpair_name(snapelem);
563185029Spjd			fn->fn_guid = guid;
564185029Spjd
565185029Spjd			/*
566185029Spjd			 * Note: if there are multiple snaps with the
567185029Spjd			 * same GUID, we ignore all but one.
568185029Spjd			 */
569185029Spjd			if (avl_find(fsavl, fn, NULL) == NULL)
570185029Spjd				avl_add(fsavl, fn);
571185029Spjd			else
572185029Spjd				free(fn);
573185029Spjd		}
574185029Spjd	}
575185029Spjd
576185029Spjd	return (fsavl);
577185029Spjd}
578185029Spjd
579185029Spjd/*
580185029Spjd * Routines for dealing with the giant nvlist of fs-nvlists, etc.
581185029Spjd */
582185029Spjdtypedef struct send_data {
583307117Smav	/*
584307117Smav	 * assigned inside every recursive call,
585307117Smav	 * restored from *_save on return:
586307117Smav	 *
587307117Smav	 * guid of fromsnap snapshot in parent dataset
588307117Smav	 * txg of fromsnap snapshot in current dataset
589307117Smav	 * txg of tosnap snapshot in current dataset
590307117Smav	 */
591307117Smav
592185029Spjd	uint64_t parent_fromsnap_guid;
593307117Smav	uint64_t fromsnap_txg;
594307117Smav	uint64_t tosnap_txg;
595307117Smav
596307117Smav	/* the nvlists get accumulated during depth-first traversal */
597185029Spjd	nvlist_t *parent_snaps;
598185029Spjd	nvlist_t *fss;
599185029Spjd	nvlist_t *snapprops;
600307117Smav
601307117Smav	/* send-receive configuration, does not change during traversal */
602307117Smav	const char *fsname;
603185029Spjd	const char *fromsnap;
604185029Spjd	const char *tosnap;
605219089Spjd	boolean_t recursive;
606307117Smav	boolean_t verbose;
607185029Spjd
608185029Spjd	/*
609185029Spjd	 * The header nvlist is of the following format:
610185029Spjd	 * {
611185029Spjd	 *   "tosnap" -> string
612185029Spjd	 *   "fromsnap" -> string (if incremental)
613185029Spjd	 *   "fss" -> {
614185029Spjd	 *	id -> {
615185029Spjd	 *
616185029Spjd	 *	 "name" -> string (full name; for debugging)
617185029Spjd	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
618185029Spjd	 *
619185029Spjd	 *	 "props" -> { name -> value (only if set here) }
620185029Spjd	 *	 "snaps" -> { name (lastname) -> number (guid) }
621185029Spjd	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
622185029Spjd	 *
623185029Spjd	 *	 "origin" -> number (guid) (if clone)
624185029Spjd	 *	 "sent" -> boolean (not on-disk)
625185029Spjd	 *	}
626185029Spjd	 *   }
627185029Spjd	 * }
628185029Spjd	 *
629185029Spjd	 */
630185029Spjd} send_data_t;
631185029Spjd
632185029Spjdstatic void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
633185029Spjd
634185029Spjdstatic int
635185029Spjdsend_iterate_snap(zfs_handle_t *zhp, void *arg)
636185029Spjd{
637185029Spjd	send_data_t *sd = arg;
638185029Spjd	uint64_t guid = zhp->zfs_dmustats.dds_guid;
639307117Smav	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
640185029Spjd	char *snapname;
641185029Spjd	nvlist_t *nv;
642185029Spjd
643185029Spjd	snapname = strrchr(zhp->zfs_name, '@')+1;
644185029Spjd
645307117Smav	if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
646307117Smav		if (sd->verbose) {
647307117Smav			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
648307117Smav			    "skipping snapshot %s because it was created "
649307117Smav			    "after the destination snapshot (%s)\n"),
650307117Smav			    zhp->zfs_name, sd->tosnap);
651307117Smav		}
652307117Smav		zfs_close(zhp);
653307117Smav		return (0);
654307117Smav	}
655307117Smav
656185029Spjd	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
657185029Spjd	/*
658185029Spjd	 * NB: if there is no fromsnap here (it's a newly created fs in
659185029Spjd	 * an incremental replication), we will substitute the tosnap.
660185029Spjd	 */
661185029Spjd	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
662185029Spjd	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
663185029Spjd	    strcmp(snapname, sd->tosnap) == 0)) {
664185029Spjd		sd->parent_fromsnap_guid = guid;
665185029Spjd	}
666185029Spjd
667185029Spjd	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
668185029Spjd	send_iterate_prop(zhp, nv);
669185029Spjd	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
670185029Spjd	nvlist_free(nv);
671185029Spjd
672185029Spjd	zfs_close(zhp);
673185029Spjd	return (0);
674185029Spjd}
675185029Spjd
676185029Spjdstatic void
677185029Spjdsend_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
678185029Spjd{
679185029Spjd	nvpair_t *elem = NULL;
680185029Spjd
681185029Spjd	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
682185029Spjd		char *propname = nvpair_name(elem);
683185029Spjd		zfs_prop_t prop = zfs_name_to_prop(propname);
684185029Spjd		nvlist_t *propnv;
685185029Spjd
686219089Spjd		if (!zfs_prop_user(propname)) {
687219089Spjd			/*
688219089Spjd			 * Realistically, this should never happen.  However,
689219089Spjd			 * we want the ability to add DSL properties without
690219089Spjd			 * needing to make incompatible version changes.  We
691219089Spjd			 * need to ignore unknown properties to allow older
692219089Spjd			 * software to still send datasets containing these
693219089Spjd			 * properties, with the unknown properties elided.
694219089Spjd			 */
695219089Spjd			if (prop == ZPROP_INVAL)
696219089Spjd				continue;
697209962Smm
698219089Spjd			if (zfs_prop_readonly(prop))
699219089Spjd				continue;
700219089Spjd		}
701185029Spjd
702185029Spjd		verify(nvpair_value_nvlist(elem, &propnv) == 0);
703219089Spjd		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
704219089Spjd		    prop == ZFS_PROP_REFQUOTA ||
705219089Spjd		    prop == ZFS_PROP_REFRESERVATION) {
706219089Spjd			char *source;
707185029Spjd			uint64_t value;
708185029Spjd			verify(nvlist_lookup_uint64(propnv,
709185029Spjd			    ZPROP_VALUE, &value) == 0);
710185029Spjd			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
711185029Spjd				continue;
712219089Spjd			/*
713219089Spjd			 * May have no source before SPA_VERSION_RECVD_PROPS,
714219089Spjd			 * but is still modifiable.
715219089Spjd			 */
716219089Spjd			if (nvlist_lookup_string(propnv,
717219089Spjd			    ZPROP_SOURCE, &source) == 0) {
718219089Spjd				if ((strcmp(source, zhp->zfs_name) != 0) &&
719219089Spjd				    (strcmp(source,
720219089Spjd				    ZPROP_SOURCE_VAL_RECVD) != 0))
721219089Spjd					continue;
722219089Spjd			}
723185029Spjd		} else {
724185029Spjd			char *source;
725185029Spjd			if (nvlist_lookup_string(propnv,
726185029Spjd			    ZPROP_SOURCE, &source) != 0)
727185029Spjd				continue;
728219089Spjd			if ((strcmp(source, zhp->zfs_name) != 0) &&
729219089Spjd			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
730185029Spjd				continue;
731185029Spjd		}
732185029Spjd
733185029Spjd		if (zfs_prop_user(propname) ||
734185029Spjd		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
735185029Spjd			char *value;
736185029Spjd			verify(nvlist_lookup_string(propnv,
737185029Spjd			    ZPROP_VALUE, &value) == 0);
738185029Spjd			VERIFY(0 == nvlist_add_string(nv, propname, value));
739185029Spjd		} else {
740185029Spjd			uint64_t value;
741185029Spjd			verify(nvlist_lookup_uint64(propnv,
742185029Spjd			    ZPROP_VALUE, &value) == 0);
743185029Spjd			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
744185029Spjd		}
745185029Spjd	}
746185029Spjd}
747185029Spjd
748219089Spjd/*
749307117Smav * returns snapshot creation txg
750307117Smav * and returns 0 if the snapshot does not exist
751307117Smav */
752307117Smavstatic uint64_t
753307117Smavget_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
754307117Smav{
755307122Smav	char name[ZFS_MAX_DATASET_NAME_LEN];
756307117Smav	uint64_t txg = 0;
757307117Smav
758307117Smav	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
759307117Smav		return (txg);
760307117Smav
761307117Smav	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
762307117Smav	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
763307117Smav		zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
764307117Smav		if (zhp != NULL) {
765307117Smav			txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
766307117Smav			zfs_close(zhp);
767307117Smav		}
768307117Smav	}
769307117Smav
770307117Smav	return (txg);
771307117Smav}
772307117Smav
773307117Smav/*
774219089Spjd * recursively generate nvlists describing datasets.  See comment
775219089Spjd * for the data structure send_data_t above for description of contents
776219089Spjd * of the nvlist.
777219089Spjd */
778185029Spjdstatic int
779185029Spjdsend_iterate_fs(zfs_handle_t *zhp, void *arg)
780185029Spjd{
781185029Spjd	send_data_t *sd = arg;
782185029Spjd	nvlist_t *nvfs, *nv;
783219089Spjd	int rv = 0;
784185029Spjd	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
785307117Smav	uint64_t fromsnap_txg_save = sd->fromsnap_txg;
786307117Smav	uint64_t tosnap_txg_save = sd->tosnap_txg;
787307117Smav	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
788185029Spjd	uint64_t guid = zhp->zfs_dmustats.dds_guid;
789307117Smav	uint64_t fromsnap_txg, tosnap_txg;
790185029Spjd	char guidstring[64];
791185029Spjd
792307117Smav	fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
793307117Smav	if (fromsnap_txg != 0)
794307117Smav		sd->fromsnap_txg = fromsnap_txg;
795307117Smav
796307117Smav	tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
797307117Smav	if (tosnap_txg != 0)
798307117Smav		sd->tosnap_txg = tosnap_txg;
799307117Smav
800307117Smav	/*
801307117Smav	 * on the send side, if the current dataset does not have tosnap,
802307117Smav	 * perform two additional checks:
803307117Smav	 *
804307117Smav	 * - skip sending the current dataset if it was created later than
805307117Smav	 *   the parent tosnap
806307117Smav	 * - return error if the current dataset was created earlier than
807307117Smav	 *   the parent tosnap
808307117Smav	 */
809307117Smav	if (sd->tosnap != NULL && tosnap_txg == 0) {
810307117Smav		if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
811307117Smav			if (sd->verbose) {
812307117Smav				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
813307117Smav				    "skipping dataset %s: snapshot %s does "
814307117Smav				    "not exist\n"), zhp->zfs_name, sd->tosnap);
815307117Smav			}
816307117Smav		} else {
817307117Smav			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
818307117Smav			    "cannot send %s@%s%s: snapshot %s@%s does not "
819307117Smav			    "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
820307117Smav			    dgettext(TEXT_DOMAIN, " recursively") : "",
821307117Smav			    zhp->zfs_name, sd->tosnap);
822307117Smav			rv = -1;
823307117Smav		}
824307117Smav		goto out;
825307117Smav	}
826307117Smav
827185029Spjd	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
828185029Spjd	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
829185029Spjd	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
830185029Spjd	    sd->parent_fromsnap_guid));
831185029Spjd
832185029Spjd	if (zhp->zfs_dmustats.dds_origin[0]) {
833185029Spjd		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
834185029Spjd		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
835307117Smav		if (origin == NULL) {
836307117Smav			rv = -1;
837307117Smav			goto out;
838307117Smav		}
839185029Spjd		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
840185029Spjd		    origin->zfs_dmustats.dds_guid));
841185029Spjd	}
842185029Spjd
843185029Spjd	/* iterate over props */
844185029Spjd	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
845185029Spjd	send_iterate_prop(zhp, nv);
846185029Spjd	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
847185029Spjd	nvlist_free(nv);
848185029Spjd
849185029Spjd	/* iterate over snaps, and set sd->parent_fromsnap_guid */
850185029Spjd	sd->parent_fromsnap_guid = 0;
851185029Spjd	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
852185029Spjd	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
853244194Ssmh	(void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
854185029Spjd	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
855185029Spjd	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
856185029Spjd	nvlist_free(sd->parent_snaps);
857185029Spjd	nvlist_free(sd->snapprops);
858185029Spjd
859185029Spjd	/* add this fs to nvlist */
860185029Spjd	(void) snprintf(guidstring, sizeof (guidstring),
861185029Spjd	    "0x%llx", (longlong_t)guid);
862185029Spjd	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
863185029Spjd	nvlist_free(nvfs);
864185029Spjd
865185029Spjd	/* iterate over children */
866219089Spjd	if (sd->recursive)
867219089Spjd		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
868185029Spjd
869307117Smavout:
870185029Spjd	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
871307117Smav	sd->fromsnap_txg = fromsnap_txg_save;
872307117Smav	sd->tosnap_txg = tosnap_txg_save;
873185029Spjd
874185029Spjd	zfs_close(zhp);
875185029Spjd	return (rv);
876185029Spjd}
877185029Spjd
878185029Spjdstatic int
879185029Spjdgather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
880307117Smav    const char *tosnap, boolean_t recursive, boolean_t verbose,
881307117Smav    nvlist_t **nvlp, avl_tree_t **avlp)
882185029Spjd{
883185029Spjd	zfs_handle_t *zhp;
884185029Spjd	send_data_t sd = { 0 };
885185029Spjd	int error;
886185029Spjd
887185029Spjd	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
888185029Spjd	if (zhp == NULL)
889185029Spjd		return (EZFS_BADTYPE);
890185029Spjd
891185029Spjd	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
892307117Smav	sd.fsname = fsname;
893185029Spjd	sd.fromsnap = fromsnap;
894185029Spjd	sd.tosnap = tosnap;
895219089Spjd	sd.recursive = recursive;
896307117Smav	sd.verbose = verbose;
897185029Spjd
898185029Spjd	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
899185029Spjd		nvlist_free(sd.fss);
900185029Spjd		if (avlp != NULL)
901185029Spjd			*avlp = NULL;
902185029Spjd		*nvlp = NULL;
903185029Spjd		return (error);
904185029Spjd	}
905185029Spjd
906185029Spjd	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
907185029Spjd		nvlist_free(sd.fss);
908185029Spjd		*nvlp = NULL;
909185029Spjd		return (EZFS_NOMEM);
910185029Spjd	}
911185029Spjd
912185029Spjd	*nvlp = sd.fss;
913185029Spjd	return (0);
914185029Spjd}
915185029Spjd
916185029Spjd/*
917185029Spjd * Routines specific to "zfs send"
918185029Spjd */
919185029Spjdtypedef struct send_dump_data {
920185029Spjd	/* these are all just the short snapname (the part after the @) */
921185029Spjd	const char *fromsnap;
922185029Spjd	const char *tosnap;
923307122Smav	char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
924219089Spjd	uint64_t prevsnap_obj;
925185029Spjd	boolean_t seenfrom, seento, replicate, doall, fromorigin;
926290755Smav	boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
927290755Smav	boolean_t large_block;
928185029Spjd	int outfd;
929185029Spjd	boolean_t err;
930185029Spjd	nvlist_t *fss;
931251646Sdelphij	nvlist_t *snapholds;
932185029Spjd	avl_tree_t *fsavl;
933219089Spjd	snapfilter_cb_t *filter_cb;
934219089Spjd	void *filter_cb_arg;
935219089Spjd	nvlist_t *debugnv;
936307122Smav	char holdtag[ZFS_MAX_DATASET_NAME_LEN];
937219089Spjd	int cleanup_fd;
938228103Smm	uint64_t size;
939185029Spjd} send_dump_data_t;
940185029Spjd
941228103Smmstatic int
942228103Smmestimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
943228103Smm    boolean_t fromorigin, uint64_t *sizep)
944228103Smm{
945228103Smm	zfs_cmd_t zc = { 0 };
946228103Smm	libzfs_handle_t *hdl = zhp->zfs_hdl;
947228103Smm
948228103Smm	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
949228103Smm	assert(fromsnap_obj == 0 || !fromorigin);
950228103Smm
951228103Smm	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
952228103Smm	zc.zc_obj = fromorigin;
953228103Smm	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
954228103Smm	zc.zc_fromobj = fromsnap_obj;
955228103Smm	zc.zc_guid = 1;  /* estimate flag */
956228103Smm
957228103Smm	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
958228103Smm		char errbuf[1024];
959228103Smm		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
960228103Smm		    "warning: cannot estimate space for '%s'"), zhp->zfs_name);
961228103Smm
962228103Smm		switch (errno) {
963228103Smm		case EXDEV:
964228103Smm			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
965228103Smm			    "not an earlier snapshot from the same fs"));
966228103Smm			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
967228103Smm
968228103Smm		case ENOENT:
969228103Smm			if (zfs_dataset_exists(hdl, zc.zc_name,
970228103Smm			    ZFS_TYPE_SNAPSHOT)) {
971228103Smm				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
972228103Smm				    "incremental source (@%s) does not exist"),
973228103Smm				    zc.zc_value);
974228103Smm			}
975228103Smm			return (zfs_error(hdl, EZFS_NOENT, errbuf));
976228103Smm
977228103Smm		case EDQUOT:
978228103Smm		case EFBIG:
979228103Smm		case EIO:
980228103Smm		case ENOLINK:
981228103Smm		case ENOSPC:
982228103Smm		case ENXIO:
983228103Smm		case EPIPE:
984228103Smm		case ERANGE:
985228103Smm		case EFAULT:
986228103Smm		case EROFS:
987228103Smm			zfs_error_aux(hdl, strerror(errno));
988228103Smm			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
989228103Smm
990228103Smm		default:
991228103Smm			return (zfs_standard_error(hdl, errno, errbuf));
992228103Smm		}
993228103Smm	}
994228103Smm
995228103Smm	*sizep = zc.zc_objset_type;
996228103Smm
997228103Smm	return (0);
998228103Smm}
999228103Smm
1000185029Spjd/*
1001185029Spjd * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
1002185029Spjd * NULL) to the file descriptor specified by outfd.
1003185029Spjd */
1004185029Spjdstatic int
1005219089Spjddump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
1006268649Sdelphij    boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
1007268649Sdelphij    nvlist_t *debugnv)
1008185029Spjd{
1009185029Spjd	zfs_cmd_t zc = { 0 };
1010185029Spjd	libzfs_handle_t *hdl = zhp->zfs_hdl;
1011219089Spjd	nvlist_t *thisdbg;
1012185029Spjd
1013185029Spjd	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1014219089Spjd	assert(fromsnap_obj == 0 || !fromorigin);
1015185029Spjd
1016185029Spjd	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1017185029Spjd	zc.zc_cookie = outfd;
1018185029Spjd	zc.zc_obj = fromorigin;
1019219089Spjd	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1020219089Spjd	zc.zc_fromobj = fromsnap_obj;
1021268649Sdelphij	zc.zc_flags = flags;
1022185029Spjd
1023219089Spjd	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
1024219089Spjd	if (fromsnap && fromsnap[0] != '\0') {
1025219089Spjd		VERIFY(0 == nvlist_add_string(thisdbg,
1026219089Spjd		    "fromsnap", fromsnap));
1027219089Spjd	}
1028219089Spjd
1029228103Smm	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
1030185029Spjd		char errbuf[1024];
1031185029Spjd		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1032185029Spjd		    "warning: cannot send '%s'"), zhp->zfs_name);
1033185029Spjd
1034219089Spjd		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
1035219089Spjd		if (debugnv) {
1036219089Spjd			VERIFY(0 == nvlist_add_nvlist(debugnv,
1037219089Spjd			    zhp->zfs_name, thisdbg));
1038219089Spjd		}
1039219089Spjd		nvlist_free(thisdbg);
1040219089Spjd
1041185029Spjd		switch (errno) {
1042185029Spjd		case EXDEV:
1043185029Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1044185029Spjd			    "not an earlier snapshot from the same fs"));
1045185029Spjd			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1046185029Spjd
1047185029Spjd		case ENOENT:
1048185029Spjd			if (zfs_dataset_exists(hdl, zc.zc_name,
1049185029Spjd			    ZFS_TYPE_SNAPSHOT)) {
1050185029Spjd				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1051185029Spjd				    "incremental source (@%s) does not exist"),
1052185029Spjd				    zc.zc_value);
1053185029Spjd			}
1054185029Spjd			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1055185029Spjd
1056185029Spjd		case EDQUOT:
1057185029Spjd		case EFBIG:
1058185029Spjd		case EIO:
1059185029Spjd		case ENOLINK:
1060185029Spjd		case ENOSPC:
1061297077Smav#ifdef illumos
1062228103Smm		case ENOSTR:
1063228103Smm#endif
1064185029Spjd		case ENXIO:
1065185029Spjd		case EPIPE:
1066185029Spjd		case ERANGE:
1067185029Spjd		case EFAULT:
1068185029Spjd		case EROFS:
1069185029Spjd			zfs_error_aux(hdl, strerror(errno));
1070185029Spjd			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1071185029Spjd
1072185029Spjd		default:
1073185029Spjd			return (zfs_standard_error(hdl, errno, errbuf));
1074185029Spjd		}
1075185029Spjd	}
1076185029Spjd
1077219089Spjd	if (debugnv)
1078219089Spjd		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
1079219089Spjd	nvlist_free(thisdbg);
1080219089Spjd
1081185029Spjd	return (0);
1082185029Spjd}
1083185029Spjd
1084251646Sdelphijstatic void
1085251646Sdelphijgather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
1086219089Spjd{
1087219089Spjd	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1088219089Spjd
1089219089Spjd	/*
1090251646Sdelphij	 * zfs_send() only sets snapholds for sends that need them,
1091219089Spjd	 * e.g. replication and doall.
1092219089Spjd	 */
1093251646Sdelphij	if (sdd->snapholds == NULL)
1094251646Sdelphij		return;
1095219089Spjd
1096251646Sdelphij	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
1097219089Spjd}
1098219089Spjd
1099235222Smmstatic void *
1100235222Smmsend_progress_thread(void *arg)
1101235222Smm{
1102235222Smm	progress_arg_t *pa = arg;
1103235222Smm	zfs_cmd_t zc = { 0 };
1104235222Smm	zfs_handle_t *zhp = pa->pa_zhp;
1105235222Smm	libzfs_handle_t *hdl = zhp->zfs_hdl;
1106235222Smm	unsigned long long bytes;
1107235222Smm	char buf[16];
1108235222Smm	time_t t;
1109235222Smm	struct tm *tm;
1110235222Smm
1111235222Smm	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1112235222Smm
1113235222Smm	if (!pa->pa_parsable)
1114235222Smm		(void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
1115235222Smm
1116235222Smm	/*
1117235222Smm	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1118235222Smm	 */
1119235222Smm	for (;;) {
1120235222Smm		(void) sleep(1);
1121235222Smm
1122235222Smm		zc.zc_cookie = pa->pa_fd;
1123235222Smm		if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1124235222Smm			return ((void *)-1);
1125235222Smm
1126235222Smm		(void) time(&t);
1127235222Smm		tm = localtime(&t);
1128235222Smm		bytes = zc.zc_cookie;
1129235222Smm
1130235222Smm		if (pa->pa_parsable) {
1131235222Smm			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1132235222Smm			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1133235222Smm			    bytes, zhp->zfs_name);
1134235222Smm		} else {
1135235222Smm			zfs_nicenum(bytes, buf, sizeof (buf));
1136235222Smm			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1137235222Smm			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1138235222Smm			    buf, zhp->zfs_name);
1139235222Smm		}
1140235222Smm	}
1141235222Smm}
1142235222Smm
1143290756Smavstatic void
1144290756Smavsend_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1145290756Smav    uint64_t size, boolean_t parsable)
1146290756Smav{
1147290756Smav	if (parsable) {
1148290756Smav		if (fromsnap != NULL) {
1149290756Smav			(void) fprintf(fout, "incremental\t%s\t%s",
1150290756Smav			    fromsnap, tosnap);
1151290756Smav		} else {
1152290756Smav			(void) fprintf(fout, "full\t%s",
1153290756Smav			    tosnap);
1154290756Smav		}
1155290756Smav	} else {
1156290756Smav		if (fromsnap != NULL) {
1157290756Smav			if (strchr(fromsnap, '@') == NULL &&
1158290756Smav			    strchr(fromsnap, '#') == NULL) {
1159290756Smav				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1160290756Smav				    "send from @%s to %s"),
1161290756Smav				    fromsnap, tosnap);
1162290756Smav			} else {
1163290756Smav				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1164290756Smav				    "send from %s to %s"),
1165290756Smav				    fromsnap, tosnap);
1166290756Smav			}
1167290756Smav		} else {
1168290756Smav			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1169290756Smav			    "full send of %s"),
1170290756Smav			    tosnap);
1171290756Smav		}
1172290756Smav	}
1173290756Smav
1174290756Smav	if (size != 0) {
1175290756Smav		if (parsable) {
1176290756Smav			(void) fprintf(fout, "\t%llu",
1177290756Smav			    (longlong_t)size);
1178290756Smav		} else {
1179290756Smav			char buf[16];
1180290756Smav			zfs_nicenum(size, buf, sizeof (buf));
1181290756Smav			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1182290756Smav			    " estimated size is %s"), buf);
1183290756Smav		}
1184290756Smav	}
1185290756Smav	(void) fprintf(fout, "\n");
1186290756Smav}
1187290756Smav
1188219089Spjdstatic int
1189185029Spjddump_snapshot(zfs_handle_t *zhp, void *arg)
1190185029Spjd{
1191185029Spjd	send_dump_data_t *sdd = arg;
1192235222Smm	progress_arg_t pa = { 0 };
1193235222Smm	pthread_t tid;
1194219089Spjd	char *thissnap;
1195185029Spjd	int err;
1196228103Smm	boolean_t isfromsnap, istosnap, fromorigin;
1197219089Spjd	boolean_t exclude = B_FALSE;
1198290755Smav	FILE *fout = sdd->std_out ? stdout : stderr;
1199185029Spjd
1200251646Sdelphij	err = 0;
1201185029Spjd	thissnap = strchr(zhp->zfs_name, '@') + 1;
1202219089Spjd	isfromsnap = (sdd->fromsnap != NULL &&
1203219089Spjd	    strcmp(sdd->fromsnap, thissnap) == 0);
1204185029Spjd
1205219089Spjd	if (!sdd->seenfrom && isfromsnap) {
1206251646Sdelphij		gather_holds(zhp, sdd);
1207251646Sdelphij		sdd->seenfrom = B_TRUE;
1208251646Sdelphij		(void) strcpy(sdd->prevsnap, thissnap);
1209251646Sdelphij		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1210185029Spjd		zfs_close(zhp);
1211251646Sdelphij		return (0);
1212185029Spjd	}
1213185029Spjd
1214185029Spjd	if (sdd->seento || !sdd->seenfrom) {
1215185029Spjd		zfs_close(zhp);
1216185029Spjd		return (0);
1217185029Spjd	}
1218185029Spjd
1219219089Spjd	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1220219089Spjd	if (istosnap)
1221219089Spjd		sdd->seento = B_TRUE;
1222219089Spjd
1223219089Spjd	if (!sdd->doall && !isfromsnap && !istosnap) {
1224219089Spjd		if (sdd->replicate) {
1225219089Spjd			char *snapname;
1226219089Spjd			nvlist_t *snapprops;
1227219089Spjd			/*
1228219089Spjd			 * Filter out all intermediate snapshots except origin
1229219089Spjd			 * snapshots needed to replicate clones.
1230219089Spjd			 */
1231219089Spjd			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1232219089Spjd			    zhp->zfs_dmustats.dds_guid, &snapname);
1233219089Spjd
1234219089Spjd			VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1235219089Spjd			    "snapprops", &snapprops));
1236219089Spjd			VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1237219089Spjd			    thissnap, &snapprops));
1238219089Spjd			exclude = !nvlist_exists(snapprops, "is_clone_origin");
1239219089Spjd		} else {
1240219089Spjd			exclude = B_TRUE;
1241219089Spjd		}
1242219089Spjd	}
1243219089Spjd
1244219089Spjd	/*
1245219089Spjd	 * If a filter function exists, call it to determine whether
1246219089Spjd	 * this snapshot will be sent.
1247219089Spjd	 */
1248219089Spjd	if (exclude || (sdd->filter_cb != NULL &&
1249219089Spjd	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1250219089Spjd		/*
1251219089Spjd		 * This snapshot is filtered out.  Don't send it, and don't
1252219089Spjd		 * set prevsnap_obj, so it will be as if this snapshot didn't
1253219089Spjd		 * exist, and the next accepted snapshot will be sent as
1254219089Spjd		 * an incremental from the last accepted one, or as the
1255219089Spjd		 * first (and full) snapshot in the case of a replication,
1256219089Spjd		 * non-incremental send.
1257219089Spjd		 */
1258219089Spjd		zfs_close(zhp);
1259219089Spjd		return (0);
1260219089Spjd	}
1261219089Spjd
1262251646Sdelphij	gather_holds(zhp, sdd);
1263228103Smm	fromorigin = sdd->prevsnap[0] == '\0' &&
1264228103Smm	    (sdd->fromorigin || sdd->replicate);
1265228103Smm
1266185029Spjd	if (sdd->verbose) {
1267290756Smav		uint64_t size = 0;
1268290756Smav		(void) estimate_ioctl(zhp, sdd->prevsnap_obj,
1269228103Smm		    fromorigin, &size);
1270228103Smm
1271290756Smav		send_print_verbose(fout, zhp->zfs_name,
1272290756Smav		    sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1273290756Smav		    size, sdd->parsable);
1274290756Smav		sdd->size += size;
1275185029Spjd	}
1276185029Spjd
1277228103Smm	if (!sdd->dryrun) {
1278235222Smm		/*
1279235222Smm		 * If progress reporting is requested, spawn a new thread to
1280235222Smm		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1281235222Smm		 */
1282235222Smm		if (sdd->progress) {
1283235222Smm			pa.pa_zhp = zhp;
1284235222Smm			pa.pa_fd = sdd->outfd;
1285235222Smm			pa.pa_parsable = sdd->parsable;
1286235222Smm
1287307058Smav			if ((err = pthread_create(&tid, NULL,
1288307058Smav			    send_progress_thread, &pa)) != 0) {
1289235222Smm				zfs_close(zhp);
1290235222Smm				return (err);
1291235222Smm			}
1292235222Smm		}
1293235222Smm
1294268649Sdelphij		enum lzc_send_flags flags = 0;
1295276081Sdelphij		if (sdd->large_block)
1296276081Sdelphij			flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1297268649Sdelphij		if (sdd->embed_data)
1298268649Sdelphij			flags |= LZC_SEND_FLAG_EMBED_DATA;
1299268649Sdelphij
1300228103Smm		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1301268649Sdelphij		    fromorigin, sdd->outfd, flags, sdd->debugnv);
1302235222Smm
1303235222Smm		if (sdd->progress) {
1304235222Smm			(void) pthread_cancel(tid);
1305235222Smm			(void) pthread_join(tid, NULL);
1306235222Smm		}
1307228103Smm	}
1308185029Spjd
1309219089Spjd	(void) strcpy(sdd->prevsnap, thissnap);
1310219089Spjd	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1311185029Spjd	zfs_close(zhp);
1312185029Spjd	return (err);
1313185029Spjd}
1314185029Spjd
1315185029Spjdstatic int
1316185029Spjddump_filesystem(zfs_handle_t *zhp, void *arg)
1317185029Spjd{
1318185029Spjd	int rv = 0;
1319185029Spjd	send_dump_data_t *sdd = arg;
1320185029Spjd	boolean_t missingfrom = B_FALSE;
1321185029Spjd	zfs_cmd_t zc = { 0 };
1322185029Spjd
1323185029Spjd	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1324185029Spjd	    zhp->zfs_name, sdd->tosnap);
1325185029Spjd	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1326228103Smm		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1327228103Smm		    "WARNING: could not send %s@%s: does not exist\n"),
1328185029Spjd		    zhp->zfs_name, sdd->tosnap);
1329185029Spjd		sdd->err = B_TRUE;
1330185029Spjd		return (0);
1331185029Spjd	}
1332185029Spjd
1333185029Spjd	if (sdd->replicate && sdd->fromsnap) {
1334185029Spjd		/*
1335185029Spjd		 * If this fs does not have fromsnap, and we're doing
1336185029Spjd		 * recursive, we need to send a full stream from the
1337185029Spjd		 * beginning (or an incremental from the origin if this
1338185029Spjd		 * is a clone).  If we're doing non-recursive, then let
1339185029Spjd		 * them get the error.
1340185029Spjd		 */
1341185029Spjd		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1342185029Spjd		    zhp->zfs_name, sdd->fromsnap);
1343185029Spjd		if (ioctl(zhp->zfs_hdl->libzfs_fd,
1344185029Spjd		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1345185029Spjd			missingfrom = B_TRUE;
1346185029Spjd		}
1347185029Spjd	}
1348185029Spjd
1349219089Spjd	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1350219089Spjd	sdd->prevsnap_obj = 0;
1351219089Spjd	if (sdd->fromsnap == NULL || missingfrom)
1352219089Spjd		sdd->seenfrom = B_TRUE;
1353185029Spjd
1354219089Spjd	rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1355219089Spjd	if (!sdd->seenfrom) {
1356228103Smm		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1357219089Spjd		    "WARNING: could not send %s@%s:\n"
1358228103Smm		    "incremental source (%s@%s) does not exist\n"),
1359219089Spjd		    zhp->zfs_name, sdd->tosnap,
1360219089Spjd		    zhp->zfs_name, sdd->fromsnap);
1361219089Spjd		sdd->err = B_TRUE;
1362219089Spjd	} else if (!sdd->seento) {
1363219089Spjd		if (sdd->fromsnap) {
1364228103Smm			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1365185029Spjd			    "WARNING: could not send %s@%s:\n"
1366219089Spjd			    "incremental source (%s@%s) "
1367228103Smm			    "is not earlier than it\n"),
1368185029Spjd			    zhp->zfs_name, sdd->tosnap,
1369185029Spjd			    zhp->zfs_name, sdd->fromsnap);
1370185029Spjd		} else {
1371228103Smm			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1372228103Smm			    "WARNING: "
1373228103Smm			    "could not send %s@%s: does not exist\n"),
1374219089Spjd			    zhp->zfs_name, sdd->tosnap);
1375185029Spjd		}
1376219089Spjd		sdd->err = B_TRUE;
1377185029Spjd	}
1378185029Spjd
1379185029Spjd	return (rv);
1380185029Spjd}
1381185029Spjd
1382185029Spjdstatic int
1383185029Spjddump_filesystems(zfs_handle_t *rzhp, void *arg)
1384185029Spjd{
1385185029Spjd	send_dump_data_t *sdd = arg;
1386185029Spjd	nvpair_t *fspair;
1387185029Spjd	boolean_t needagain, progress;
1388185029Spjd
1389185029Spjd	if (!sdd->replicate)
1390185029Spjd		return (dump_filesystem(rzhp, sdd));
1391185029Spjd
1392219089Spjd	/* Mark the clone origin snapshots. */
1393219089Spjd	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1394219089Spjd	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1395219089Spjd		nvlist_t *nvfs;
1396219089Spjd		uint64_t origin_guid = 0;
1397219089Spjd
1398219089Spjd		VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1399219089Spjd		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1400219089Spjd		if (origin_guid != 0) {
1401219089Spjd			char *snapname;
1402219089Spjd			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1403219089Spjd			    origin_guid, &snapname);
1404219089Spjd			if (origin_nv != NULL) {
1405219089Spjd				nvlist_t *snapprops;
1406219089Spjd				VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1407219089Spjd				    "snapprops", &snapprops));
1408219089Spjd				VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1409219089Spjd				    snapname, &snapprops));
1410219089Spjd				VERIFY(0 == nvlist_add_boolean(
1411219089Spjd				    snapprops, "is_clone_origin"));
1412219089Spjd			}
1413219089Spjd		}
1414219089Spjd	}
1415185029Spjdagain:
1416185029Spjd	needagain = progress = B_FALSE;
1417185029Spjd	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1418185029Spjd	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1419228103Smm		nvlist_t *fslist, *parent_nv;
1420185029Spjd		char *fsname;
1421185029Spjd		zfs_handle_t *zhp;
1422185029Spjd		int err;
1423185029Spjd		uint64_t origin_guid = 0;
1424228103Smm		uint64_t parent_guid = 0;
1425185029Spjd
1426185029Spjd		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1427185029Spjd		if (nvlist_lookup_boolean(fslist, "sent") == 0)
1428185029Spjd			continue;
1429185029Spjd
1430185029Spjd		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1431185029Spjd		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1432228103Smm		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1433228103Smm		    &parent_guid);
1434185029Spjd
1435228103Smm		if (parent_guid != 0) {
1436228103Smm			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1437228103Smm			if (!nvlist_exists(parent_nv, "sent")) {
1438228103Smm				/* parent has not been sent; skip this one */
1439228103Smm				needagain = B_TRUE;
1440228103Smm				continue;
1441228103Smm			}
1442228103Smm		}
1443228103Smm
1444219089Spjd		if (origin_guid != 0) {
1445219089Spjd			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1446219089Spjd			    origin_guid, NULL);
1447219089Spjd			if (origin_nv != NULL &&
1448228103Smm			    !nvlist_exists(origin_nv, "sent")) {
1449219089Spjd				/*
1450219089Spjd				 * origin has not been sent yet;
1451219089Spjd				 * skip this clone.
1452219089Spjd				 */
1453219089Spjd				needagain = B_TRUE;
1454219089Spjd				continue;
1455219089Spjd			}
1456185029Spjd		}
1457185029Spjd
1458185029Spjd		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1459185029Spjd		if (zhp == NULL)
1460185029Spjd			return (-1);
1461185029Spjd		err = dump_filesystem(zhp, sdd);
1462185029Spjd		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1463185029Spjd		progress = B_TRUE;
1464185029Spjd		zfs_close(zhp);
1465185029Spjd		if (err)
1466185029Spjd			return (err);
1467185029Spjd	}
1468185029Spjd	if (needagain) {
1469185029Spjd		assert(progress);
1470185029Spjd		goto again;
1471185029Spjd	}
1472228103Smm
1473228103Smm	/* clean out the sent flags in case we reuse this fss */
1474228103Smm	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1475228103Smm	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1476228103Smm		nvlist_t *fslist;
1477228103Smm
1478228103Smm		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1479228103Smm		(void) nvlist_remove_all(fslist, "sent");
1480228103Smm	}
1481228103Smm
1482185029Spjd	return (0);
1483185029Spjd}
1484185029Spjd
1485290756Smavnvlist_t *
1486290756Smavzfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1487290756Smav{
1488290756Smav	unsigned int version;
1489290756Smav	int nread;
1490290756Smav	unsigned long long checksum, packed_len;
1491290756Smav
1492290756Smav	/*
1493290756Smav	 * Decode token header, which is:
1494290756Smav	 *   <token version>-<checksum of payload>-<uncompressed payload length>
1495290756Smav	 * Note that the only supported token version is 1.
1496290756Smav	 */
1497290756Smav	nread = sscanf(token, "%u-%llx-%llx-",
1498290756Smav	    &version, &checksum, &packed_len);
1499290756Smav	if (nread != 3) {
1500290756Smav		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1501290756Smav		    "resume token is corrupt (invalid format)"));
1502290756Smav		return (NULL);
1503290756Smav	}
1504290756Smav
1505290756Smav	if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1506290756Smav		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1507290756Smav		    "resume token is corrupt (invalid version %u)"),
1508290756Smav		    version);
1509290756Smav		return (NULL);
1510290756Smav	}
1511290756Smav
1512290756Smav	/* convert hexadecimal representation to binary */
1513290756Smav	token = strrchr(token, '-') + 1;
1514290756Smav	int len = strlen(token) / 2;
1515290756Smav	unsigned char *compressed = zfs_alloc(hdl, len);
1516290756Smav	for (int i = 0; i < len; i++) {
1517290756Smav		nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1518290756Smav		if (nread != 1) {
1519290756Smav			free(compressed);
1520290756Smav			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1521290756Smav			    "resume token is corrupt "
1522290756Smav			    "(payload is not hex-encoded)"));
1523290756Smav			return (NULL);
1524290756Smav		}
1525290756Smav	}
1526290756Smav
1527290756Smav	/* verify checksum */
1528290756Smav	zio_cksum_t cksum;
1529290757Smav	fletcher_4_native(compressed, len, NULL, &cksum);
1530290756Smav	if (cksum.zc_word[0] != checksum) {
1531290756Smav		free(compressed);
1532290756Smav		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1533290756Smav		    "resume token is corrupt (incorrect checksum)"));
1534290756Smav		return (NULL);
1535290756Smav	}
1536290756Smav
1537290756Smav	/* uncompress */
1538290756Smav	void *packed = zfs_alloc(hdl, packed_len);
1539290756Smav	uLongf packed_len_long = packed_len;
1540290756Smav	if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1541290756Smav	    packed_len_long != packed_len) {
1542290756Smav		free(packed);
1543290756Smav		free(compressed);
1544290756Smav		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1545290756Smav		    "resume token is corrupt (decompression failed)"));
1546290756Smav		return (NULL);
1547290756Smav	}
1548290756Smav
1549290756Smav	/* unpack nvlist */
1550290756Smav	nvlist_t *nv;
1551290756Smav	int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1552290756Smav	free(packed);
1553290756Smav	free(compressed);
1554290756Smav	if (error != 0) {
1555290756Smav		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1556290756Smav		    "resume token is corrupt (nvlist_unpack failed)"));
1557290756Smav		return (NULL);
1558290756Smav	}
1559290756Smav	return (nv);
1560290756Smav}
1561290756Smav
1562290756Smavint
1563290756Smavzfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1564290756Smav    const char *resume_token)
1565290756Smav{
1566290756Smav	char errbuf[1024];
1567290756Smav	char *toname;
1568290756Smav	char *fromname = NULL;
1569290756Smav	uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1570290756Smav	zfs_handle_t *zhp;
1571290756Smav	int error = 0;
1572307122Smav	char name[ZFS_MAX_DATASET_NAME_LEN];
1573290756Smav	enum lzc_send_flags lzc_flags = 0;
1574290756Smav
1575290756Smav	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1576290756Smav	    "cannot resume send"));
1577290756Smav
1578290756Smav	nvlist_t *resume_nvl =
1579290756Smav	    zfs_send_resume_token_to_nvlist(hdl, resume_token);
1580290756Smav	if (resume_nvl == NULL) {
1581290756Smav		/*
1582290756Smav		 * zfs_error_aux has already been set by
1583290756Smav		 * zfs_send_resume_token_to_nvlist
1584290756Smav		 */
1585290756Smav		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1586290756Smav	}
1587290756Smav	if (flags->verbose) {
1588290756Smav		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1589290756Smav		    "resume token contents:\n"));
1590290756Smav		nvlist_print(stderr, resume_nvl);
1591290756Smav	}
1592290756Smav
1593290756Smav	if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1594290756Smav	    nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1595290756Smav	    nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1596290756Smav	    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1597290756Smav	    nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1598290756Smav		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1599290756Smav		    "resume token is corrupt"));
1600290756Smav		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1601290756Smav	}
1602290756Smav	fromguid = 0;
1603290756Smav	(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1604290756Smav
1605290756Smav	if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
1606290756Smav		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1607290756Smav
1608290756Smav	if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
1609290756Smav		if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1610290756Smav			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1611290756Smav			    "'%s' is no longer the same snapshot used in "
1612290756Smav			    "the initial send"), toname);
1613290756Smav		} else {
1614290756Smav			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1615290756Smav			    "'%s' used in the initial send no longer exists"),
1616290756Smav			    toname);
1617290756Smav		}
1618290756Smav		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1619290756Smav	}
1620290756Smav	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1621290756Smav	if (zhp == NULL) {
1622290756Smav		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1623290756Smav		    "unable to access '%s'"), name);
1624290756Smav		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1625290756Smav	}
1626290756Smav
1627290756Smav	if (fromguid != 0) {
1628290756Smav		if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
1629290756Smav			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1630290756Smav			    "incremental source %#llx no longer exists"),
1631290756Smav			    (longlong_t)fromguid);
1632290756Smav			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1633290756Smav		}
1634290756Smav		fromname = name;
1635290756Smav	}
1636290756Smav
1637290756Smav	if (flags->verbose) {
1638290756Smav		uint64_t size = 0;
1639290756Smav		error = lzc_send_space(zhp->zfs_name, fromname, &size);
1640290756Smav		if (error == 0)
1641290756Smav			size = MAX(0, (int64_t)(size - bytes));
1642290756Smav		send_print_verbose(stderr, zhp->zfs_name, fromname,
1643290756Smav		    size, flags->parsable);
1644290756Smav	}
1645290756Smav
1646290756Smav	if (!flags->dryrun) {
1647290756Smav		progress_arg_t pa = { 0 };
1648290756Smav		pthread_t tid;
1649290756Smav		/*
1650290756Smav		 * If progress reporting is requested, spawn a new thread to
1651290756Smav		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1652290756Smav		 */
1653290756Smav		if (flags->progress) {
1654290756Smav			pa.pa_zhp = zhp;
1655290756Smav			pa.pa_fd = outfd;
1656290756Smav			pa.pa_parsable = flags->parsable;
1657290756Smav
1658290756Smav			error = pthread_create(&tid, NULL,
1659290756Smav			    send_progress_thread, &pa);
1660290756Smav			if (error != 0) {
1661290756Smav				zfs_close(zhp);
1662290756Smav				return (error);
1663290756Smav			}
1664290756Smav		}
1665290756Smav
1666290756Smav		error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
1667290756Smav		    lzc_flags, resumeobj, resumeoff);
1668290756Smav
1669290756Smav		if (flags->progress) {
1670290756Smav			(void) pthread_cancel(tid);
1671290756Smav			(void) pthread_join(tid, NULL);
1672290756Smav		}
1673290756Smav
1674290756Smav		char errbuf[1024];
1675290756Smav		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1676290756Smav		    "warning: cannot send '%s'"), zhp->zfs_name);
1677290756Smav
1678290756Smav		zfs_close(zhp);
1679290756Smav
1680290756Smav		switch (error) {
1681290756Smav		case 0:
1682290756Smav			return (0);
1683290756Smav		case EXDEV:
1684290756Smav		case ENOENT:
1685290756Smav		case EDQUOT:
1686290756Smav		case EFBIG:
1687290756Smav		case EIO:
1688290756Smav		case ENOLINK:
1689290756Smav		case ENOSPC:
1690290756Smav#ifdef illumos
1691290756Smav		case ENOSTR:
1692290756Smav#endif
1693290756Smav		case ENXIO:
1694290756Smav		case EPIPE:
1695290756Smav		case ERANGE:
1696290756Smav		case EFAULT:
1697290756Smav		case EROFS:
1698290756Smav			zfs_error_aux(hdl, strerror(errno));
1699290756Smav			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1700290756Smav
1701290756Smav		default:
1702290756Smav			return (zfs_standard_error(hdl, errno, errbuf));
1703290756Smav		}
1704290756Smav	}
1705290756Smav
1706290756Smav
1707290756Smav	zfs_close(zhp);
1708290756Smav
1709290756Smav	return (error);
1710290756Smav}
1711290756Smav
1712185029Spjd/*
1713219089Spjd * Generate a send stream for the dataset identified by the argument zhp.
1714219089Spjd *
1715219089Spjd * The content of the send stream is the snapshot identified by
1716219089Spjd * 'tosnap'.  Incremental streams are requested in two ways:
1717219089Spjd *     - from the snapshot identified by "fromsnap" (if non-null) or
1718219089Spjd *     - from the origin of the dataset identified by zhp, which must
1719219089Spjd *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
1720219089Spjd *	 is TRUE.
1721219089Spjd *
1722219089Spjd * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1723219089Spjd * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1724219089Spjd * if "replicate" is set.  If "doall" is set, dump all the intermediate
1725219089Spjd * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1726219089Spjd * case too. If "props" is set, send properties.
1727185029Spjd */
1728185029Spjdint
1729185029Spjdzfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1730228103Smm    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1731219089Spjd    void *cb_arg, nvlist_t **debugnvp)
1732185029Spjd{
1733185029Spjd	char errbuf[1024];
1734185029Spjd	send_dump_data_t sdd = { 0 };
1735228103Smm	int err = 0;
1736185029Spjd	nvlist_t *fss = NULL;
1737185029Spjd	avl_tree_t *fsavl = NULL;
1738219089Spjd	static uint64_t holdseq;
1739219089Spjd	int spa_version;
1740251646Sdelphij	pthread_t tid = 0;
1741219089Spjd	int pipefd[2];
1742219089Spjd	dedup_arg_t dda = { 0 };
1743219089Spjd	int featureflags = 0;
1744290755Smav	FILE *fout;
1745185029Spjd
1746185029Spjd	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1747185029Spjd	    "cannot send '%s'"), zhp->zfs_name);
1748185029Spjd
1749185029Spjd	if (fromsnap && fromsnap[0] == '\0') {
1750185029Spjd		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1751185029Spjd		    "zero-length incremental source"));
1752185029Spjd		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1753185029Spjd	}
1754185029Spjd
1755219089Spjd	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1756219089Spjd		uint64_t version;
1757219089Spjd		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1758219089Spjd		if (version >= ZPL_VERSION_SA) {
1759219089Spjd			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1760219089Spjd		}
1761219089Spjd	}
1762219089Spjd
1763228103Smm	if (flags->dedup && !flags->dryrun) {
1764219089Spjd		featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1765219089Spjd		    DMU_BACKUP_FEATURE_DEDUPPROPS);
1766307058Smav		if ((err = pipe(pipefd)) != 0) {
1767219089Spjd			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1768219089Spjd			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1769219089Spjd			    errbuf));
1770219089Spjd		}
1771219089Spjd		dda.outputfd = outfd;
1772219089Spjd		dda.inputfd = pipefd[1];
1773219089Spjd		dda.dedup_hdl = zhp->zfs_hdl;
1774307058Smav		if ((err = pthread_create(&tid, NULL, cksummer, &dda)) != 0) {
1775219089Spjd			(void) close(pipefd[0]);
1776219089Spjd			(void) close(pipefd[1]);
1777219089Spjd			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1778219089Spjd			return (zfs_error(zhp->zfs_hdl,
1779219089Spjd			    EZFS_THREADCREATEFAILED, errbuf));
1780219089Spjd		}
1781219089Spjd	}
1782219089Spjd
1783228103Smm	if (flags->replicate || flags->doall || flags->props) {
1784185029Spjd		dmu_replay_record_t drr = { 0 };
1785185029Spjd		char *packbuf = NULL;
1786185029Spjd		size_t buflen = 0;
1787185029Spjd		zio_cksum_t zc = { 0 };
1788185029Spjd
1789228103Smm		if (flags->replicate || flags->props) {
1790185029Spjd			nvlist_t *hdrnv;
1791185029Spjd
1792185029Spjd			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1793185029Spjd			if (fromsnap) {
1794185029Spjd				VERIFY(0 == nvlist_add_string(hdrnv,
1795185029Spjd				    "fromsnap", fromsnap));
1796185029Spjd			}
1797185029Spjd			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1798228103Smm			if (!flags->replicate) {
1799219089Spjd				VERIFY(0 == nvlist_add_boolean(hdrnv,
1800219089Spjd				    "not_recursive"));
1801219089Spjd			}
1802185029Spjd
1803185029Spjd			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1804307117Smav			    fromsnap, tosnap, flags->replicate, flags->verbose,
1805307117Smav			    &fss, &fsavl);
1806185029Spjd			if (err)
1807219089Spjd				goto err_out;
1808185029Spjd			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1809185029Spjd			err = nvlist_pack(hdrnv, &packbuf, &buflen,
1810185029Spjd			    NV_ENCODE_XDR, 0);
1811219089Spjd			if (debugnvp)
1812219089Spjd				*debugnvp = hdrnv;
1813219089Spjd			else
1814219089Spjd				nvlist_free(hdrnv);
1815251646Sdelphij			if (err)
1816219089Spjd				goto stderr_out;
1817185029Spjd		}
1818185029Spjd
1819228103Smm		if (!flags->dryrun) {
1820228103Smm			/* write first begin record */
1821228103Smm			drr.drr_type = DRR_BEGIN;
1822228103Smm			drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1823228103Smm			DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1824228103Smm			    drr_versioninfo, DMU_COMPOUNDSTREAM);
1825228103Smm			DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1826228103Smm			    drr_versioninfo, featureflags);
1827228103Smm			(void) snprintf(drr.drr_u.drr_begin.drr_toname,
1828228103Smm			    sizeof (drr.drr_u.drr_begin.drr_toname),
1829228103Smm			    "%s@%s", zhp->zfs_name, tosnap);
1830228103Smm			drr.drr_payloadlen = buflen;
1831185029Spjd
1832288553Smav			err = dump_record(&drr, packbuf, buflen, &zc, outfd);
1833228103Smm			free(packbuf);
1834288553Smav			if (err != 0)
1835228103Smm				goto stderr_out;
1836185029Spjd
1837228103Smm			/* write end record */
1838185029Spjd			bzero(&drr, sizeof (drr));
1839185029Spjd			drr.drr_type = DRR_END;
1840185029Spjd			drr.drr_u.drr_end.drr_checksum = zc;
1841185029Spjd			err = write(outfd, &drr, sizeof (drr));
1842185029Spjd			if (err == -1) {
1843219089Spjd				err = errno;
1844219089Spjd				goto stderr_out;
1845185029Spjd			}
1846228103Smm
1847228103Smm			err = 0;
1848185029Spjd		}
1849185029Spjd	}
1850185029Spjd
1851185029Spjd	/* dump each stream */
1852185029Spjd	sdd.fromsnap = fromsnap;
1853185029Spjd	sdd.tosnap = tosnap;
1854251646Sdelphij	if (tid != 0)
1855219089Spjd		sdd.outfd = pipefd[0];
1856219089Spjd	else
1857219089Spjd		sdd.outfd = outfd;
1858228103Smm	sdd.replicate = flags->replicate;
1859228103Smm	sdd.doall = flags->doall;
1860228103Smm	sdd.fromorigin = flags->fromorigin;
1861185029Spjd	sdd.fss = fss;
1862185029Spjd	sdd.fsavl = fsavl;
1863228103Smm	sdd.verbose = flags->verbose;
1864228103Smm	sdd.parsable = flags->parsable;
1865235222Smm	sdd.progress = flags->progress;
1866228103Smm	sdd.dryrun = flags->dryrun;
1867276081Sdelphij	sdd.large_block = flags->largeblock;
1868268649Sdelphij	sdd.embed_data = flags->embed_data;
1869219089Spjd	sdd.filter_cb = filter_func;
1870219089Spjd	sdd.filter_cb_arg = cb_arg;
1871219089Spjd	if (debugnvp)
1872219089Spjd		sdd.debugnv = *debugnvp;
1873290755Smav	if (sdd.verbose && sdd.dryrun)
1874290755Smav		sdd.std_out = B_TRUE;
1875290755Smav	fout = sdd.std_out ? stdout : stderr;
1876238422Smm
1877238422Smm	/*
1878238422Smm	 * Some flags require that we place user holds on the datasets that are
1879238422Smm	 * being sent so they don't get destroyed during the send. We can skip
1880238422Smm	 * this step if the pool is imported read-only since the datasets cannot
1881238422Smm	 * be destroyed.
1882238422Smm	 */
1883238422Smm	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1884238422Smm	    ZPOOL_PROP_READONLY, NULL) &&
1885238422Smm	    zfs_spa_version(zhp, &spa_version) == 0 &&
1886238422Smm	    spa_version >= SPA_VERSION_USERREFS &&
1887238422Smm	    (flags->doall || flags->replicate)) {
1888219089Spjd		++holdseq;
1889219089Spjd		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1890219089Spjd		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1891219089Spjd		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1892219089Spjd		if (sdd.cleanup_fd < 0) {
1893219089Spjd			err = errno;
1894219089Spjd			goto stderr_out;
1895219089Spjd		}
1896251646Sdelphij		sdd.snapholds = fnvlist_alloc();
1897219089Spjd	} else {
1898219089Spjd		sdd.cleanup_fd = -1;
1899251646Sdelphij		sdd.snapholds = NULL;
1900219089Spjd	}
1901251646Sdelphij	if (flags->verbose || sdd.snapholds != NULL) {
1902228103Smm		/*
1903228103Smm		 * Do a verbose no-op dry run to get all the verbose output
1904251646Sdelphij		 * or to gather snapshot hold's before generating any data,
1905251646Sdelphij		 * then do a non-verbose real run to generate the streams.
1906228103Smm		 */
1907228103Smm		sdd.dryrun = B_TRUE;
1908228103Smm		err = dump_filesystems(zhp, &sdd);
1909251646Sdelphij
1910251646Sdelphij		if (err != 0)
1911251646Sdelphij			goto stderr_out;
1912251646Sdelphij
1913251646Sdelphij		if (flags->verbose) {
1914251646Sdelphij			if (flags->parsable) {
1915290755Smav				(void) fprintf(fout, "size\t%llu\n",
1916251646Sdelphij				    (longlong_t)sdd.size);
1917251646Sdelphij			} else {
1918251646Sdelphij				char buf[16];
1919251646Sdelphij				zfs_nicenum(sdd.size, buf, sizeof (buf));
1920290755Smav				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1921251646Sdelphij				    "total estimated size is %s\n"), buf);
1922251646Sdelphij			}
1923251646Sdelphij		}
1924251646Sdelphij
1925251646Sdelphij		/* Ensure no snaps found is treated as an error. */
1926251646Sdelphij		if (!sdd.seento) {
1927251646Sdelphij			err = ENOENT;
1928251646Sdelphij			goto err_out;
1929251646Sdelphij		}
1930251646Sdelphij
1931251646Sdelphij		/* Skip the second run if dryrun was requested. */
1932251646Sdelphij		if (flags->dryrun)
1933251646Sdelphij			goto err_out;
1934251646Sdelphij
1935251646Sdelphij		if (sdd.snapholds != NULL) {
1936251646Sdelphij			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1937251646Sdelphij			if (err != 0)
1938251646Sdelphij				goto stderr_out;
1939251646Sdelphij
1940251646Sdelphij			fnvlist_free(sdd.snapholds);
1941251646Sdelphij			sdd.snapholds = NULL;
1942251646Sdelphij		}
1943251646Sdelphij
1944251646Sdelphij		sdd.dryrun = B_FALSE;
1945228103Smm		sdd.verbose = B_FALSE;
1946228103Smm	}
1947251646Sdelphij
1948185029Spjd	err = dump_filesystems(zhp, &sdd);
1949185029Spjd	fsavl_destroy(fsavl);
1950185029Spjd	nvlist_free(fss);
1951185029Spjd
1952251646Sdelphij	/* Ensure no snaps found is treated as an error. */
1953251646Sdelphij	if (err == 0 && !sdd.seento)
1954251646Sdelphij		err = ENOENT;
1955251646Sdelphij
1956251646Sdelphij	if (tid != 0) {
1957251646Sdelphij		if (err != 0)
1958251646Sdelphij			(void) pthread_cancel(tid);
1959253818Ssmh		(void) close(pipefd[0]);
1960251646Sdelphij		(void) pthread_join(tid, NULL);
1961219089Spjd	}
1962219089Spjd
1963219089Spjd	if (sdd.cleanup_fd != -1) {
1964219089Spjd		VERIFY(0 == close(sdd.cleanup_fd));
1965219089Spjd		sdd.cleanup_fd = -1;
1966219089Spjd	}
1967219089Spjd
1968228103Smm	if (!flags->dryrun && (flags->replicate || flags->doall ||
1969228103Smm	    flags->props)) {
1970185029Spjd		/*
1971185029Spjd		 * write final end record.  NB: want to do this even if
1972185029Spjd		 * there was some error, because it might not be totally
1973185029Spjd		 * failed.
1974185029Spjd		 */
1975185029Spjd		dmu_replay_record_t drr = { 0 };
1976185029Spjd		drr.drr_type = DRR_END;
1977185029Spjd		if (write(outfd, &drr, sizeof (drr)) == -1) {
1978185029Spjd			return (zfs_standard_error(zhp->zfs_hdl,
1979185029Spjd			    errno, errbuf));
1980185029Spjd		}
1981185029Spjd	}
1982185029Spjd
1983185029Spjd	return (err || sdd.err);
1984219089Spjd
1985219089Spjdstderr_out:
1986219089Spjd	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1987219089Spjderr_out:
1988251646Sdelphij	fsavl_destroy(fsavl);
1989251646Sdelphij	nvlist_free(fss);
1990251646Sdelphij	fnvlist_free(sdd.snapholds);
1991251646Sdelphij
1992219089Spjd	if (sdd.cleanup_fd != -1)
1993219089Spjd		VERIFY(0 == close(sdd.cleanup_fd));
1994251646Sdelphij	if (tid != 0) {
1995219089Spjd		(void) pthread_cancel(tid);
1996253818Ssmh		(void) close(pipefd[0]);
1997219089Spjd		(void) pthread_join(tid, NULL);
1998219089Spjd	}
1999219089Spjd	return (err);
2000185029Spjd}
2001185029Spjd
2002263407Sdelphijint
2003268649Sdelphijzfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
2004268649Sdelphij    enum lzc_send_flags flags)
2005263407Sdelphij{
2006263407Sdelphij	int err;
2007263407Sdelphij	libzfs_handle_t *hdl = zhp->zfs_hdl;
2008263407Sdelphij
2009263407Sdelphij	char errbuf[1024];
2010263407Sdelphij	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2011263407Sdelphij	    "warning: cannot send '%s'"), zhp->zfs_name);
2012263407Sdelphij
2013268649Sdelphij	err = lzc_send(zhp->zfs_name, from, fd, flags);
2014263407Sdelphij	if (err != 0) {
2015263407Sdelphij		switch (errno) {
2016263407Sdelphij		case EXDEV:
2017263407Sdelphij			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2018263407Sdelphij			    "not an earlier snapshot from the same fs"));
2019263407Sdelphij			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2020263407Sdelphij
2021263407Sdelphij		case ENOENT:
2022263407Sdelphij		case ESRCH:
2023263407Sdelphij			if (lzc_exists(zhp->zfs_name)) {
2024263407Sdelphij				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2025263407Sdelphij				    "incremental source (%s) does not exist"),
2026263407Sdelphij				    from);
2027263407Sdelphij			}
2028263407Sdelphij			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2029263407Sdelphij
2030263407Sdelphij		case EBUSY:
2031263407Sdelphij			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2032263407Sdelphij			    "target is busy; if a filesystem, "
2033263407Sdelphij			    "it must not be mounted"));
2034263407Sdelphij			return (zfs_error(hdl, EZFS_BUSY, errbuf));
2035263407Sdelphij
2036263407Sdelphij		case EDQUOT:
2037263407Sdelphij		case EFBIG:
2038263407Sdelphij		case EIO:
2039263407Sdelphij		case ENOLINK:
2040263407Sdelphij		case ENOSPC:
2041263407Sdelphij#ifdef illumos
2042263407Sdelphij		case ENOSTR:
2043263407Sdelphij#endif
2044263407Sdelphij		case ENXIO:
2045263407Sdelphij		case EPIPE:
2046263407Sdelphij		case ERANGE:
2047263407Sdelphij		case EFAULT:
2048263407Sdelphij		case EROFS:
2049263407Sdelphij			zfs_error_aux(hdl, strerror(errno));
2050263407Sdelphij			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2051263407Sdelphij
2052263407Sdelphij		default:
2053263407Sdelphij			return (zfs_standard_error(hdl, errno, errbuf));
2054263407Sdelphij		}
2055263407Sdelphij	}
2056263407Sdelphij	return (err != 0);
2057263407Sdelphij}
2058263407Sdelphij
2059185029Spjd/*
2060185029Spjd * Routines specific to "zfs recv"
2061185029Spjd */
2062185029Spjd
2063185029Spjdstatic int
2064185029Spjdrecv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2065185029Spjd    boolean_t byteswap, zio_cksum_t *zc)
2066185029Spjd{
2067185029Spjd	char *cp = buf;
2068185029Spjd	int rv;
2069185029Spjd	int len = ilen;
2070185029Spjd
2071288553Smav	assert(ilen <= SPA_MAXBLOCKSIZE);
2072288553Smav
2073185029Spjd	do {
2074185029Spjd		rv = read(fd, cp, len);
2075185029Spjd		cp += rv;
2076185029Spjd		len -= rv;
2077185029Spjd	} while (rv > 0);
2078185029Spjd
2079185029Spjd	if (rv < 0 || len != 0) {
2080185029Spjd		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2081185029Spjd		    "failed to read from stream"));
2082185029Spjd		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2083185029Spjd		    "cannot receive")));
2084185029Spjd	}
2085185029Spjd
2086185029Spjd	if (zc) {
2087185029Spjd		if (byteswap)
2088185029Spjd			fletcher_4_incremental_byteswap(buf, ilen, zc);
2089185029Spjd		else
2090185029Spjd			fletcher_4_incremental_native(buf, ilen, zc);
2091185029Spjd	}
2092185029Spjd	return (0);
2093185029Spjd}
2094185029Spjd
2095185029Spjdstatic int
2096185029Spjdrecv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2097185029Spjd    boolean_t byteswap, zio_cksum_t *zc)
2098185029Spjd{
2099185029Spjd	char *buf;
2100185029Spjd	int err;
2101185029Spjd
2102185029Spjd	buf = zfs_alloc(hdl, len);
2103185029Spjd	if (buf == NULL)
2104185029Spjd		return (ENOMEM);
2105185029Spjd
2106185029Spjd	err = recv_read(hdl, fd, buf, len, byteswap, zc);
2107185029Spjd	if (err != 0) {
2108185029Spjd		free(buf);
2109185029Spjd		return (err);
2110185029Spjd	}
2111185029Spjd
2112185029Spjd	err = nvlist_unpack(buf, len, nvp, 0);
2113185029Spjd	free(buf);
2114185029Spjd	if (err != 0) {
2115185029Spjd		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2116185029Spjd		    "stream (malformed nvlist)"));
2117185029Spjd		return (EINVAL);
2118185029Spjd	}
2119185029Spjd	return (0);
2120185029Spjd}
2121185029Spjd
2122185029Spjdstatic int
2123185029Spjdrecv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2124228103Smm    int baselen, char *newname, recvflags_t *flags)
2125185029Spjd{
2126185029Spjd	static int seq;
2127185029Spjd	zfs_cmd_t zc = { 0 };
2128185029Spjd	int err;
2129185029Spjd	prop_changelist_t *clp;
2130185029Spjd	zfs_handle_t *zhp;
2131185029Spjd
2132185029Spjd	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2133185029Spjd	if (zhp == NULL)
2134185029Spjd		return (-1);
2135185029Spjd	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2136228103Smm	    flags->force ? MS_FORCE : 0);
2137185029Spjd	zfs_close(zhp);
2138185029Spjd	if (clp == NULL)
2139185029Spjd		return (-1);
2140185029Spjd	err = changelist_prefix(clp);
2141185029Spjd	if (err)
2142185029Spjd		return (err);
2143185029Spjd
2144219089Spjd	zc.zc_objset_type = DMU_OST_ZFS;
2145219089Spjd	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2146219089Spjd
2147185029Spjd	if (tryname) {
2148185029Spjd		(void) strcpy(newname, tryname);
2149185029Spjd
2150185029Spjd		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
2151185029Spjd
2152228103Smm		if (flags->verbose) {
2153185029Spjd			(void) printf("attempting rename %s to %s\n",
2154185029Spjd			    zc.zc_name, zc.zc_value);
2155185029Spjd		}
2156185029Spjd		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2157185029Spjd		if (err == 0)
2158185029Spjd			changelist_rename(clp, name, tryname);
2159185029Spjd	} else {
2160185029Spjd		err = ENOENT;
2161185029Spjd	}
2162185029Spjd
2163248571Smm	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
2164185029Spjd		seq++;
2165185029Spjd
2166307122Smav		(void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
2167307122Smav		    "%.*srecv-%u-%u", baselen, name, getpid(), seq);
2168185029Spjd		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
2169185029Spjd
2170228103Smm		if (flags->verbose) {
2171185029Spjd			(void) printf("failed - trying rename %s to %s\n",
2172185029Spjd			    zc.zc_name, zc.zc_value);
2173185029Spjd		}
2174185029Spjd		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2175185029Spjd		if (err == 0)
2176185029Spjd			changelist_rename(clp, name, newname);
2177228103Smm		if (err && flags->verbose) {
2178185029Spjd			(void) printf("failed (%u) - "
2179185029Spjd			    "will try again on next pass\n", errno);
2180185029Spjd		}
2181185029Spjd		err = EAGAIN;
2182228103Smm	} else if (flags->verbose) {
2183185029Spjd		if (err == 0)
2184185029Spjd			(void) printf("success\n");
2185185029Spjd		else
2186185029Spjd			(void) printf("failed (%u)\n", errno);
2187185029Spjd	}
2188185029Spjd
2189185029Spjd	(void) changelist_postfix(clp);
2190185029Spjd	changelist_free(clp);
2191185029Spjd
2192185029Spjd	return (err);
2193185029Spjd}
2194185029Spjd
2195185029Spjdstatic int
2196185029Spjdrecv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
2197228103Smm    char *newname, recvflags_t *flags)
2198185029Spjd{
2199185029Spjd	zfs_cmd_t zc = { 0 };
2200185029Spjd	int err = 0;
2201185029Spjd	prop_changelist_t *clp;
2202185029Spjd	zfs_handle_t *zhp;
2203219089Spjd	boolean_t defer = B_FALSE;
2204219089Spjd	int spa_version;
2205185029Spjd
2206185029Spjd	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2207185029Spjd	if (zhp == NULL)
2208185029Spjd		return (-1);
2209185029Spjd	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2210228103Smm	    flags->force ? MS_FORCE : 0);
2211219089Spjd	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
2212219089Spjd	    zfs_spa_version(zhp, &spa_version) == 0 &&
2213219089Spjd	    spa_version >= SPA_VERSION_USERREFS)
2214219089Spjd		defer = B_TRUE;
2215185029Spjd	zfs_close(zhp);
2216185029Spjd	if (clp == NULL)
2217185029Spjd		return (-1);
2218185029Spjd	err = changelist_prefix(clp);
2219185029Spjd	if (err)
2220185029Spjd		return (err);
2221185029Spjd
2222185029Spjd	zc.zc_objset_type = DMU_OST_ZFS;
2223219089Spjd	zc.zc_defer_destroy = defer;
2224185029Spjd	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2225185029Spjd
2226228103Smm	if (flags->verbose)
2227185029Spjd		(void) printf("attempting destroy %s\n", zc.zc_name);
2228185029Spjd	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
2229185029Spjd	if (err == 0) {
2230228103Smm		if (flags->verbose)
2231185029Spjd			(void) printf("success\n");
2232185029Spjd		changelist_remove(clp, zc.zc_name);
2233185029Spjd	}
2234185029Spjd
2235185029Spjd	(void) changelist_postfix(clp);
2236185029Spjd	changelist_free(clp);
2237185029Spjd
2238219089Spjd	/*
2239219089Spjd	 * Deferred destroy might destroy the snapshot or only mark it to be
2240219089Spjd	 * destroyed later, and it returns success in either case.
2241219089Spjd	 */
2242219089Spjd	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
2243219089Spjd	    ZFS_TYPE_SNAPSHOT))) {
2244185029Spjd		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
2245219089Spjd	}
2246185029Spjd
2247185029Spjd	return (err);
2248185029Spjd}
2249185029Spjd
2250185029Spjdtypedef struct guid_to_name_data {
2251185029Spjd	uint64_t guid;
2252290756Smav	boolean_t bookmark_ok;
2253185029Spjd	char *name;
2254228103Smm	char *skip;
2255185029Spjd} guid_to_name_data_t;
2256185029Spjd
2257185029Spjdstatic int
2258185029Spjdguid_to_name_cb(zfs_handle_t *zhp, void *arg)
2259185029Spjd{
2260185029Spjd	guid_to_name_data_t *gtnd = arg;
2261290756Smav	const char *slash;
2262185029Spjd	int err;
2263185029Spjd
2264228103Smm	if (gtnd->skip != NULL &&
2265290756Smav	    (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
2266290756Smav	    strcmp(slash + 1, gtnd->skip) == 0) {
2267290756Smav		zfs_close(zhp);
2268228103Smm		return (0);
2269228103Smm	}
2270228103Smm
2271290756Smav	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
2272185029Spjd		(void) strcpy(gtnd->name, zhp->zfs_name);
2273219089Spjd		zfs_close(zhp);
2274185029Spjd		return (EEXIST);
2275185029Spjd	}
2276228103Smm
2277185029Spjd	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
2278290756Smav	if (err != EEXIST && gtnd->bookmark_ok)
2279290756Smav		err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
2280185029Spjd	zfs_close(zhp);
2281185029Spjd	return (err);
2282185029Spjd}
2283185029Spjd
2284228103Smm/*
2285228103Smm * Attempt to find the local dataset associated with this guid.  In the case of
2286228103Smm * multiple matches, we attempt to find the "best" match by searching
2287228103Smm * progressively larger portions of the hierarchy.  This allows one to send a
2288228103Smm * tree of datasets individually and guarantee that we will find the source
2289228103Smm * guid within that hierarchy, even if there are multiple matches elsewhere.
2290228103Smm */
2291185029Spjdstatic int
2292185029Spjdguid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
2293290756Smav    boolean_t bookmark_ok, char *name)
2294185029Spjd{
2295307122Smav	char pname[ZFS_MAX_DATASET_NAME_LEN];
2296185029Spjd	guid_to_name_data_t gtnd;
2297185029Spjd
2298185029Spjd	gtnd.guid = guid;
2299290756Smav	gtnd.bookmark_ok = bookmark_ok;
2300185029Spjd	gtnd.name = name;
2301228103Smm	gtnd.skip = NULL;
2302185029Spjd
2303228103Smm	/*
2304290756Smav	 * Search progressively larger portions of the hierarchy, starting
2305290756Smav	 * with the filesystem specified by 'parent'.  This will
2306228103Smm	 * select the "most local" version of the origin snapshot in the case
2307228103Smm	 * that there are multiple matching snapshots in the system.
2308228103Smm	 */
2309290756Smav	(void) strlcpy(pname, parent, sizeof (pname));
2310290756Smav	char *cp = strrchr(pname, '@');
2311290756Smav	if (cp == NULL)
2312290756Smav		cp = strchr(pname, '\0');
2313290756Smav	for (; cp != NULL; cp = strrchr(pname, '/')) {
2314228103Smm		/* Chop off the last component and open the parent */
2315185029Spjd		*cp = '\0';
2316290756Smav		zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
2317185029Spjd
2318228103Smm		if (zhp == NULL)
2319228103Smm			continue;
2320290756Smav		int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
2321290756Smav		if (err != EEXIST)
2322290756Smav			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
2323290756Smav		if (err != EEXIST && bookmark_ok)
2324290756Smav			err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
2325185029Spjd		zfs_close(zhp);
2326228103Smm		if (err == EEXIST)
2327228103Smm			return (0);
2328228103Smm
2329228103Smm		/*
2330290756Smav		 * Remember the last portion of the dataset so we skip it next
2331290756Smav		 * time through (as we've already searched that portion of the
2332290756Smav		 * hierarchy).
2333228103Smm		 */
2334290756Smav		gtnd.skip = strrchr(pname, '/') + 1;
2335185029Spjd	}
2336185029Spjd
2337228103Smm	return (ENOENT);
2338185029Spjd}
2339185029Spjd
2340185029Spjd/*
2341228103Smm * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
2342228103Smm * guid1 is after guid2.
2343185029Spjd */
2344185029Spjdstatic int
2345185029Spjdcreated_before(libzfs_handle_t *hdl, avl_tree_t *avl,
2346185029Spjd    uint64_t guid1, uint64_t guid2)
2347185029Spjd{
2348185029Spjd	nvlist_t *nvfs;
2349185029Spjd	char *fsname, *snapname;
2350307122Smav	char buf[ZFS_MAX_DATASET_NAME_LEN];
2351185029Spjd	int rv;
2352228103Smm	zfs_handle_t *guid1hdl, *guid2hdl;
2353228103Smm	uint64_t create1, create2;
2354185029Spjd
2355185029Spjd	if (guid2 == 0)
2356185029Spjd		return (0);
2357185029Spjd	if (guid1 == 0)
2358185029Spjd		return (1);
2359185029Spjd
2360185029Spjd	nvfs = fsavl_find(avl, guid1, &snapname);
2361185029Spjd	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2362185029Spjd	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2363228103Smm	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2364228103Smm	if (guid1hdl == NULL)
2365185029Spjd		return (-1);
2366185029Spjd
2367185029Spjd	nvfs = fsavl_find(avl, guid2, &snapname);
2368185029Spjd	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2369185029Spjd	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2370228103Smm	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2371228103Smm	if (guid2hdl == NULL) {
2372228103Smm		zfs_close(guid1hdl);
2373185029Spjd		return (-1);
2374185029Spjd	}
2375185029Spjd
2376228103Smm	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2377228103Smm	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2378185029Spjd
2379228103Smm	if (create1 < create2)
2380228103Smm		rv = -1;
2381228103Smm	else if (create1 > create2)
2382228103Smm		rv = +1;
2383228103Smm	else
2384228103Smm		rv = 0;
2385185029Spjd
2386228103Smm	zfs_close(guid1hdl);
2387228103Smm	zfs_close(guid2hdl);
2388228103Smm
2389185029Spjd	return (rv);
2390185029Spjd}
2391185029Spjd
2392185029Spjdstatic int
2393185029Spjdrecv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2394228103Smm    recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2395219089Spjd    nvlist_t *renamed)
2396185029Spjd{
2397244194Ssmh	nvlist_t *local_nv, *deleted = NULL;
2398185029Spjd	avl_tree_t *local_avl;
2399185029Spjd	nvpair_t *fselem, *nextfselem;
2400219089Spjd	char *fromsnap;
2401307122Smav	char newname[ZFS_MAX_DATASET_NAME_LEN];
2402244194Ssmh	char guidname[32];
2403185029Spjd	int error;
2404219089Spjd	boolean_t needagain, progress, recursive;
2405209962Smm	char *s1, *s2;
2406185029Spjd
2407185029Spjd	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2408185029Spjd
2409219089Spjd	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2410219089Spjd	    ENOENT);
2411219089Spjd
2412228103Smm	if (flags->dryrun)
2413185029Spjd		return (0);
2414185029Spjd
2415185029Spjdagain:
2416185029Spjd	needagain = progress = B_FALSE;
2417185029Spjd
2418244194Ssmh	VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
2419244194Ssmh
2420185029Spjd	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2421307117Smav	    recursive, B_FALSE, &local_nv, &local_avl)) != 0)
2422185029Spjd		return (error);
2423185029Spjd
2424185029Spjd	/*
2425185029Spjd	 * Process deletes and renames
2426185029Spjd	 */
2427185029Spjd	for (fselem = nvlist_next_nvpair(local_nv, NULL);
2428185029Spjd	    fselem; fselem = nextfselem) {
2429185029Spjd		nvlist_t *nvfs, *snaps;
2430185029Spjd		nvlist_t *stream_nvfs = NULL;
2431185029Spjd		nvpair_t *snapelem, *nextsnapelem;
2432185029Spjd		uint64_t fromguid = 0;
2433185029Spjd		uint64_t originguid = 0;
2434185029Spjd		uint64_t stream_originguid = 0;
2435185029Spjd		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2436219089Spjd		char *fsname, *stream_fsname;
2437185029Spjd
2438185029Spjd		nextfselem = nvlist_next_nvpair(local_nv, fselem);
2439185029Spjd
2440185029Spjd		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2441185029Spjd		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2442185029Spjd		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2443185029Spjd		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2444185029Spjd		    &parent_fromsnap_guid));
2445185029Spjd		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2446185029Spjd
2447185029Spjd		/*
2448185029Spjd		 * First find the stream's fs, so we can check for
2449185029Spjd		 * a different origin (due to "zfs promote")
2450185029Spjd		 */
2451185029Spjd		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2452185029Spjd		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2453185029Spjd			uint64_t thisguid;
2454185029Spjd
2455185029Spjd			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2456185029Spjd			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2457185029Spjd
2458185029Spjd			if (stream_nvfs != NULL)
2459185029Spjd				break;
2460185029Spjd		}
2461185029Spjd
2462185029Spjd		/* check for promote */
2463185029Spjd		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
2464185029Spjd		    &stream_originguid);
2465185029Spjd		if (stream_nvfs && originguid != stream_originguid) {
2466185029Spjd			switch (created_before(hdl, local_avl,
2467185029Spjd			    stream_originguid, originguid)) {
2468185029Spjd			case 1: {
2469185029Spjd				/* promote it! */
2470185029Spjd				zfs_cmd_t zc = { 0 };
2471185029Spjd				nvlist_t *origin_nvfs;
2472185029Spjd				char *origin_fsname;
2473185029Spjd
2474228103Smm				if (flags->verbose)
2475185029Spjd					(void) printf("promoting %s\n", fsname);
2476185029Spjd
2477185029Spjd				origin_nvfs = fsavl_find(local_avl, originguid,
2478185029Spjd				    NULL);
2479185029Spjd				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2480185029Spjd				    "name", &origin_fsname));
2481185029Spjd				(void) strlcpy(zc.zc_value, origin_fsname,
2482185029Spjd				    sizeof (zc.zc_value));
2483185029Spjd				(void) strlcpy(zc.zc_name, fsname,
2484185029Spjd				    sizeof (zc.zc_name));
2485185029Spjd				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2486185029Spjd				if (error == 0)
2487185029Spjd					progress = B_TRUE;
2488185029Spjd				break;
2489185029Spjd			}
2490185029Spjd			default:
2491185029Spjd				break;
2492185029Spjd			case -1:
2493185029Spjd				fsavl_destroy(local_avl);
2494185029Spjd				nvlist_free(local_nv);
2495185029Spjd				return (-1);
2496185029Spjd			}
2497185029Spjd			/*
2498185029Spjd			 * We had/have the wrong origin, therefore our
2499185029Spjd			 * list of snapshots is wrong.  Need to handle
2500185029Spjd			 * them on the next pass.
2501185029Spjd			 */
2502185029Spjd			needagain = B_TRUE;
2503185029Spjd			continue;
2504185029Spjd		}
2505185029Spjd
2506185029Spjd		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2507185029Spjd		    snapelem; snapelem = nextsnapelem) {
2508185029Spjd			uint64_t thisguid;
2509185029Spjd			char *stream_snapname;
2510185029Spjd			nvlist_t *found, *props;
2511185029Spjd
2512185029Spjd			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2513185029Spjd
2514185029Spjd			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2515185029Spjd			found = fsavl_find(stream_avl, thisguid,
2516185029Spjd			    &stream_snapname);
2517185029Spjd
2518185029Spjd			/* check for delete */
2519185029Spjd			if (found == NULL) {
2520307122Smav				char name[ZFS_MAX_DATASET_NAME_LEN];
2521185029Spjd
2522228103Smm				if (!flags->force)
2523185029Spjd					continue;
2524185029Spjd
2525185029Spjd				(void) snprintf(name, sizeof (name), "%s@%s",
2526185029Spjd				    fsname, nvpair_name(snapelem));
2527185029Spjd
2528185029Spjd				error = recv_destroy(hdl, name,
2529185029Spjd				    strlen(fsname)+1, newname, flags);
2530185029Spjd				if (error)
2531185029Spjd					needagain = B_TRUE;
2532185029Spjd				else
2533185029Spjd					progress = B_TRUE;
2534244194Ssmh				sprintf(guidname, "%lu", thisguid);
2535244194Ssmh				nvlist_add_boolean(deleted, guidname);
2536185029Spjd				continue;
2537185029Spjd			}
2538185029Spjd
2539185029Spjd			stream_nvfs = found;
2540185029Spjd
2541185029Spjd			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2542185029Spjd			    &props) && 0 == nvlist_lookup_nvlist(props,
2543185029Spjd			    stream_snapname, &props)) {
2544185029Spjd				zfs_cmd_t zc = { 0 };
2545185029Spjd
2546219089Spjd				zc.zc_cookie = B_TRUE; /* received */
2547185029Spjd				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2548185029Spjd				    "%s@%s", fsname, nvpair_name(snapelem));
2549185029Spjd				if (zcmd_write_src_nvlist(hdl, &zc,
2550185029Spjd				    props) == 0) {
2551185029Spjd					(void) zfs_ioctl(hdl,
2552185029Spjd					    ZFS_IOC_SET_PROP, &zc);
2553185029Spjd					zcmd_free_nvlists(&zc);
2554185029Spjd				}
2555185029Spjd			}
2556185029Spjd
2557185029Spjd			/* check for different snapname */
2558185029Spjd			if (strcmp(nvpair_name(snapelem),
2559185029Spjd			    stream_snapname) != 0) {
2560307122Smav				char name[ZFS_MAX_DATASET_NAME_LEN];
2561307122Smav				char tryname[ZFS_MAX_DATASET_NAME_LEN];
2562185029Spjd
2563185029Spjd				(void) snprintf(name, sizeof (name), "%s@%s",
2564185029Spjd				    fsname, nvpair_name(snapelem));
2565185029Spjd				(void) snprintf(tryname, sizeof (name), "%s@%s",
2566185029Spjd				    fsname, stream_snapname);
2567185029Spjd
2568185029Spjd				error = recv_rename(hdl, name, tryname,
2569185029Spjd				    strlen(fsname)+1, newname, flags);
2570185029Spjd				if (error)
2571185029Spjd					needagain = B_TRUE;
2572185029Spjd				else
2573185029Spjd					progress = B_TRUE;
2574185029Spjd			}
2575185029Spjd
2576185029Spjd			if (strcmp(stream_snapname, fromsnap) == 0)
2577185029Spjd				fromguid = thisguid;
2578185029Spjd		}
2579185029Spjd
2580185029Spjd		/* check for delete */
2581185029Spjd		if (stream_nvfs == NULL) {
2582228103Smm			if (!flags->force)
2583185029Spjd				continue;
2584185029Spjd
2585185029Spjd			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2586185029Spjd			    newname, flags);
2587185029Spjd			if (error)
2588185029Spjd				needagain = B_TRUE;
2589185029Spjd			else
2590185029Spjd				progress = B_TRUE;
2591244194Ssmh			sprintf(guidname, "%lu", parent_fromsnap_guid);
2592244194Ssmh			nvlist_add_boolean(deleted, guidname);
2593185029Spjd			continue;
2594185029Spjd		}
2595185029Spjd
2596219089Spjd		if (fromguid == 0) {
2597228103Smm			if (flags->verbose) {
2598219089Spjd				(void) printf("local fs %s does not have "
2599219089Spjd				    "fromsnap (%s in stream); must have "
2600219089Spjd				    "been deleted locally; ignoring\n",
2601219089Spjd				    fsname, fromsnap);
2602219089Spjd			}
2603185029Spjd			continue;
2604185029Spjd		}
2605185029Spjd
2606185029Spjd		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2607185029Spjd		    "name", &stream_fsname));
2608185029Spjd		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2609185029Spjd		    "parentfromsnap", &stream_parent_fromsnap_guid));
2610185029Spjd
2611209962Smm		s1 = strrchr(fsname, '/');
2612209962Smm		s2 = strrchr(stream_fsname, '/');
2613209962Smm
2614219089Spjd		/*
2615244194Ssmh		 * Check if we're going to rename based on parent guid change
2616244194Ssmh		 * and the current parent guid was also deleted. If it was then
2617244194Ssmh		 * rename will fail and is likely unneeded, so avoid this and
2618244194Ssmh		 * force an early retry to determine the new
2619244194Ssmh		 * parent_fromsnap_guid.
2620244194Ssmh		 */
2621244194Ssmh		if (stream_parent_fromsnap_guid != 0 &&
2622244194Ssmh                    parent_fromsnap_guid != 0 &&
2623244194Ssmh                    stream_parent_fromsnap_guid != parent_fromsnap_guid) {
2624244194Ssmh			sprintf(guidname, "%lu", parent_fromsnap_guid);
2625244194Ssmh			if (nvlist_exists(deleted, guidname)) {
2626244194Ssmh				progress = B_TRUE;
2627244194Ssmh				needagain = B_TRUE;
2628244194Ssmh				goto doagain;
2629244194Ssmh			}
2630244194Ssmh		}
2631244194Ssmh
2632244194Ssmh		/*
2633219089Spjd		 * Check for rename. If the exact receive path is specified, it
2634219089Spjd		 * does not count as a rename, but we still need to check the
2635219089Spjd		 * datasets beneath it.
2636219089Spjd		 */
2637185029Spjd		if ((stream_parent_fromsnap_guid != 0 &&
2638219089Spjd		    parent_fromsnap_guid != 0 &&
2639185029Spjd		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2640228103Smm		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2641219089Spjd		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2642185029Spjd			nvlist_t *parent;
2643307122Smav			char tryname[ZFS_MAX_DATASET_NAME_LEN];
2644185029Spjd
2645185029Spjd			parent = fsavl_find(local_avl,
2646185029Spjd			    stream_parent_fromsnap_guid, NULL);
2647185029Spjd			/*
2648185029Spjd			 * NB: parent might not be found if we used the
2649185029Spjd			 * tosnap for stream_parent_fromsnap_guid,
2650185029Spjd			 * because the parent is a newly-created fs;
2651185029Spjd			 * we'll be able to rename it after we recv the
2652185029Spjd			 * new fs.
2653185029Spjd			 */
2654185029Spjd			if (parent != NULL) {
2655185029Spjd				char *pname;
2656185029Spjd
2657185029Spjd				VERIFY(0 == nvlist_lookup_string(parent, "name",
2658185029Spjd				    &pname));
2659185029Spjd				(void) snprintf(tryname, sizeof (tryname),
2660219089Spjd				    "%s%s", pname, strrchr(stream_fsname, '/'));
2661185029Spjd			} else {
2662185029Spjd				tryname[0] = '\0';
2663228103Smm				if (flags->verbose) {
2664185029Spjd					(void) printf("local fs %s new parent "
2665185029Spjd					    "not found\n", fsname);
2666185029Spjd				}
2667185029Spjd			}
2668185029Spjd
2669219089Spjd			newname[0] = '\0';
2670219089Spjd
2671185029Spjd			error = recv_rename(hdl, fsname, tryname,
2672185029Spjd			    strlen(tofs)+1, newname, flags);
2673219089Spjd
2674219089Spjd			if (renamed != NULL && newname[0] != '\0') {
2675219089Spjd				VERIFY(0 == nvlist_add_boolean(renamed,
2676219089Spjd				    newname));
2677219089Spjd			}
2678219089Spjd
2679185029Spjd			if (error)
2680185029Spjd				needagain = B_TRUE;
2681185029Spjd			else
2682185029Spjd				progress = B_TRUE;
2683185029Spjd		}
2684185029Spjd	}
2685185029Spjd
2686244194Ssmhdoagain:
2687185029Spjd	fsavl_destroy(local_avl);
2688185029Spjd	nvlist_free(local_nv);
2689244194Ssmh	nvlist_free(deleted);
2690185029Spjd
2691185029Spjd	if (needagain && progress) {
2692185029Spjd		/* do another pass to fix up temporary names */
2693228103Smm		if (flags->verbose)
2694185029Spjd			(void) printf("another pass:\n");
2695185029Spjd		goto again;
2696185029Spjd	}
2697185029Spjd
2698185029Spjd	return (needagain);
2699185029Spjd}
2700185029Spjd
2701185029Spjdstatic int
2702185029Spjdzfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2703228103Smm    recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2704219089Spjd    char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2705185029Spjd{
2706185029Spjd	nvlist_t *stream_nv = NULL;
2707185029Spjd	avl_tree_t *stream_avl = NULL;
2708185029Spjd	char *fromsnap = NULL;
2709297103Smav	char *sendsnap = NULL;
2710219089Spjd	char *cp;
2711307122Smav	char tofs[ZFS_MAX_DATASET_NAME_LEN];
2712307122Smav	char sendfs[ZFS_MAX_DATASET_NAME_LEN];
2713185029Spjd	char errbuf[1024];
2714185029Spjd	dmu_replay_record_t drre;
2715185029Spjd	int error;
2716185029Spjd	boolean_t anyerr = B_FALSE;
2717185029Spjd	boolean_t softerr = B_FALSE;
2718219089Spjd	boolean_t recursive;
2719185029Spjd
2720185029Spjd	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2721185029Spjd	    "cannot receive"));
2722185029Spjd
2723185029Spjd	assert(drr->drr_type == DRR_BEGIN);
2724185029Spjd	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2725219089Spjd	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2726219089Spjd	    DMU_COMPOUNDSTREAM);
2727185029Spjd
2728185029Spjd	/*
2729185029Spjd	 * Read in the nvlist from the stream.
2730185029Spjd	 */
2731185029Spjd	if (drr->drr_payloadlen != 0) {
2732185029Spjd		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2733228103Smm		    &stream_nv, flags->byteswap, zc);
2734185029Spjd		if (error) {
2735185029Spjd			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2736185029Spjd			goto out;
2737185029Spjd		}
2738185029Spjd	}
2739185029Spjd
2740219089Spjd	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2741219089Spjd	    ENOENT);
2742219089Spjd
2743219089Spjd	if (recursive && strchr(destname, '@')) {
2744219089Spjd		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2745219089Spjd		    "cannot specify snapshot name for multi-snapshot stream"));
2746219089Spjd		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2747219089Spjd		goto out;
2748219089Spjd	}
2749219089Spjd
2750185029Spjd	/*
2751185029Spjd	 * Read in the end record and verify checksum.
2752185029Spjd	 */
2753185029Spjd	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2754228103Smm	    flags->byteswap, NULL)))
2755185029Spjd		goto out;
2756228103Smm	if (flags->byteswap) {
2757185029Spjd		drre.drr_type = BSWAP_32(drre.drr_type);
2758185029Spjd		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2759185029Spjd		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2760185029Spjd		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2761185029Spjd		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2762185029Spjd		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2763185029Spjd		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2764185029Spjd		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2765185029Spjd		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2766185029Spjd	}
2767185029Spjd	if (drre.drr_type != DRR_END) {
2768185029Spjd		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2769185029Spjd		goto out;
2770185029Spjd	}
2771185029Spjd	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2772185029Spjd		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2773185029Spjd		    "incorrect header checksum"));
2774185029Spjd		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2775185029Spjd		goto out;
2776185029Spjd	}
2777185029Spjd
2778185029Spjd	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2779185029Spjd
2780185029Spjd	if (drr->drr_payloadlen != 0) {
2781185029Spjd		nvlist_t *stream_fss;
2782185029Spjd
2783185029Spjd		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2784185029Spjd		    &stream_fss));
2785185029Spjd		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2786185029Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2787185029Spjd			    "couldn't allocate avl tree"));
2788185029Spjd			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2789185029Spjd			goto out;
2790185029Spjd		}
2791185029Spjd
2792185029Spjd		if (fromsnap != NULL) {
2793219089Spjd			nvlist_t *renamed = NULL;
2794219089Spjd			nvpair_t *pair = NULL;
2795219089Spjd
2796307122Smav			(void) strlcpy(tofs, destname, sizeof (tofs));
2797228103Smm			if (flags->isprefix) {
2798219089Spjd				struct drr_begin *drrb = &drr->drr_u.drr_begin;
2799219089Spjd				int i;
2800219089Spjd
2801228103Smm				if (flags->istail) {
2802219089Spjd					cp = strrchr(drrb->drr_toname, '/');
2803219089Spjd					if (cp == NULL) {
2804219089Spjd						(void) strlcat(tofs, "/",
2805307122Smav						    sizeof (tofs));
2806219089Spjd						i = 0;
2807219089Spjd					} else {
2808219089Spjd						i = (cp - drrb->drr_toname);
2809219089Spjd					}
2810219089Spjd				} else {
2811219089Spjd					i = strcspn(drrb->drr_toname, "/@");
2812219089Spjd				}
2813185029Spjd				/* zfs_receive_one() will create_parents() */
2814219089Spjd				(void) strlcat(tofs, &drrb->drr_toname[i],
2815307122Smav				    sizeof (tofs));
2816185029Spjd				*strchr(tofs, '@') = '\0';
2817185029Spjd			}
2818219089Spjd
2819228103Smm			if (recursive && !flags->dryrun && !flags->nomount) {
2820219089Spjd				VERIFY(0 == nvlist_alloc(&renamed,
2821219089Spjd				    NV_UNIQUE_NAME, 0));
2822219089Spjd			}
2823219089Spjd
2824219089Spjd			softerr = recv_incremental_replication(hdl, tofs, flags,
2825219089Spjd			    stream_nv, stream_avl, renamed);
2826219089Spjd
2827219089Spjd			/* Unmount renamed filesystems before receiving. */
2828219089Spjd			while ((pair = nvlist_next_nvpair(renamed,
2829219089Spjd			    pair)) != NULL) {
2830219089Spjd				zfs_handle_t *zhp;
2831219089Spjd				prop_changelist_t *clp = NULL;
2832219089Spjd
2833219089Spjd				zhp = zfs_open(hdl, nvpair_name(pair),
2834219089Spjd				    ZFS_TYPE_FILESYSTEM);
2835219089Spjd				if (zhp != NULL) {
2836219089Spjd					clp = changelist_gather(zhp,
2837219089Spjd					    ZFS_PROP_MOUNTPOINT, 0, 0);
2838219089Spjd					zfs_close(zhp);
2839219089Spjd					if (clp != NULL) {
2840219089Spjd						softerr |=
2841219089Spjd						    changelist_prefix(clp);
2842219089Spjd						changelist_free(clp);
2843219089Spjd					}
2844219089Spjd				}
2845219089Spjd			}
2846219089Spjd
2847219089Spjd			nvlist_free(renamed);
2848185029Spjd		}
2849185029Spjd	}
2850185029Spjd
2851219089Spjd	/*
2852219089Spjd	 * Get the fs specified by the first path in the stream (the top level
2853219089Spjd	 * specified by 'zfs send') and pass it to each invocation of
2854219089Spjd	 * zfs_receive_one().
2855219089Spjd	 */
2856219089Spjd	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2857307122Smav	    sizeof (sendfs));
2858297103Smav	if ((cp = strchr(sendfs, '@')) != NULL) {
2859219089Spjd		*cp = '\0';
2860297103Smav		/*
2861297103Smav		 * Find the "sendsnap", the final snapshot in a replication
2862297103Smav		 * stream.  zfs_receive_one() handles certain errors
2863297103Smav		 * differently, depending on if the contained stream is the
2864297103Smav		 * last one or not.
2865297103Smav		 */
2866297103Smav		sendsnap = (cp + 1);
2867297103Smav	}
2868185029Spjd
2869185029Spjd	/* Finally, receive each contained stream */
2870185029Spjd	do {
2871185029Spjd		/*
2872185029Spjd		 * we should figure out if it has a recoverable
2873185029Spjd		 * error, in which case do a recv_skip() and drive on.
2874185029Spjd		 * Note, if we fail due to already having this guid,
2875185029Spjd		 * zfs_receive_one() will take care of it (ie,
2876185029Spjd		 * recv_skip() and return 0).
2877185029Spjd		 */
2878288571Smav		error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
2879219089Spjd		    sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2880297103Smav		    action_handlep, sendsnap);
2881185029Spjd		if (error == ENODATA) {
2882185029Spjd			error = 0;
2883185029Spjd			break;
2884185029Spjd		}
2885185029Spjd		anyerr |= error;
2886185029Spjd	} while (error == 0);
2887185029Spjd
2888185029Spjd	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2889185029Spjd		/*
2890185029Spjd		 * Now that we have the fs's they sent us, try the
2891185029Spjd		 * renames again.
2892185029Spjd		 */
2893185029Spjd		softerr = recv_incremental_replication(hdl, tofs, flags,
2894219089Spjd		    stream_nv, stream_avl, NULL);
2895185029Spjd	}
2896185029Spjd
2897185029Spjdout:
2898185029Spjd	fsavl_destroy(stream_avl);
2899297115Smav	nvlist_free(stream_nv);
2900185029Spjd	if (softerr)
2901185029Spjd		error = -2;
2902185029Spjd	if (anyerr)
2903185029Spjd		error = -1;
2904185029Spjd	return (error);
2905185029Spjd}
2906185029Spjd
2907219089Spjdstatic void
2908219089Spjdtrunc_prop_errs(int truncated)
2909219089Spjd{
2910219089Spjd	ASSERT(truncated != 0);
2911219089Spjd
2912219089Spjd	if (truncated == 1)
2913219089Spjd		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2914219089Spjd		    "1 more property could not be set\n"));
2915219089Spjd	else
2916219089Spjd		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2917219089Spjd		    "%d more properties could not be set\n"), truncated);
2918219089Spjd}
2919219089Spjd
2920185029Spjdstatic int
2921185029Spjdrecv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2922185029Spjd{
2923185029Spjd	dmu_replay_record_t *drr;
2924276081Sdelphij	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
2925219089Spjd	char errbuf[1024];
2926185029Spjd
2927219089Spjd	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2928219089Spjd	    "cannot receive:"));
2929219089Spjd
2930185029Spjd	/* XXX would be great to use lseek if possible... */
2931185029Spjd	drr = buf;
2932185029Spjd
2933185029Spjd	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2934185029Spjd	    byteswap, NULL) == 0) {
2935185029Spjd		if (byteswap)
2936185029Spjd			drr->drr_type = BSWAP_32(drr->drr_type);
2937185029Spjd
2938185029Spjd		switch (drr->drr_type) {
2939185029Spjd		case DRR_BEGIN:
2940219089Spjd			if (drr->drr_payloadlen != 0) {
2941290756Smav				(void) recv_read(hdl, fd, buf,
2942290756Smav				    drr->drr_payloadlen, B_FALSE, NULL);
2943219089Spjd			}
2944185029Spjd			break;
2945185029Spjd
2946185029Spjd		case DRR_END:
2947185029Spjd			free(buf);
2948185029Spjd			return (0);
2949185029Spjd
2950185029Spjd		case DRR_OBJECT:
2951185029Spjd			if (byteswap) {
2952185029Spjd				drr->drr_u.drr_object.drr_bonuslen =
2953185029Spjd				    BSWAP_32(drr->drr_u.drr_object.
2954185029Spjd				    drr_bonuslen);
2955185029Spjd			}
2956185029Spjd			(void) recv_read(hdl, fd, buf,
2957185029Spjd			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2958185029Spjd			    B_FALSE, NULL);
2959185029Spjd			break;
2960185029Spjd
2961185029Spjd		case DRR_WRITE:
2962185029Spjd			if (byteswap) {
2963185029Spjd				drr->drr_u.drr_write.drr_length =
2964185029Spjd				    BSWAP_64(drr->drr_u.drr_write.drr_length);
2965185029Spjd			}
2966185029Spjd			(void) recv_read(hdl, fd, buf,
2967185029Spjd			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2968185029Spjd			break;
2969219089Spjd		case DRR_SPILL:
2970219089Spjd			if (byteswap) {
2971307118Smav				drr->drr_u.drr_spill.drr_length =
2972219089Spjd				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
2973219089Spjd			}
2974219089Spjd			(void) recv_read(hdl, fd, buf,
2975219089Spjd			    drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2976219089Spjd			break;
2977268649Sdelphij		case DRR_WRITE_EMBEDDED:
2978268649Sdelphij			if (byteswap) {
2979268649Sdelphij				drr->drr_u.drr_write_embedded.drr_psize =
2980268649Sdelphij				    BSWAP_32(drr->drr_u.drr_write_embedded.
2981268649Sdelphij				    drr_psize);
2982268649Sdelphij			}
2983268649Sdelphij			(void) recv_read(hdl, fd, buf,
2984268649Sdelphij			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
2985268649Sdelphij			    8), B_FALSE, NULL);
2986268649Sdelphij			break;
2987219089Spjd		case DRR_WRITE_BYREF:
2988185029Spjd		case DRR_FREEOBJECTS:
2989185029Spjd		case DRR_FREE:
2990185029Spjd			break;
2991185029Spjd
2992185029Spjd		default:
2993219089Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2994219089Spjd			    "invalid record type"));
2995219089Spjd			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2996185029Spjd		}
2997185029Spjd	}
2998185029Spjd
2999185029Spjd	free(buf);
3000185029Spjd	return (-1);
3001185029Spjd}
3002185029Spjd
3003290756Smavstatic void
3004290756Smavrecv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
3005290756Smav    boolean_t resumable)
3006290756Smav{
3007307122Smav	char target_fs[ZFS_MAX_DATASET_NAME_LEN];
3008290756Smav
3009290756Smav	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3010290756Smav	    "checksum mismatch or incomplete stream"));
3011290756Smav
3012290756Smav	if (!resumable)
3013290756Smav		return;
3014290756Smav	(void) strlcpy(target_fs, target_snap, sizeof (target_fs));
3015290756Smav	*strchr(target_fs, '@') = '\0';
3016290756Smav	zfs_handle_t *zhp = zfs_open(hdl, target_fs,
3017290756Smav	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3018290756Smav	if (zhp == NULL)
3019290756Smav		return;
3020290756Smav
3021290756Smav	char token_buf[ZFS_MAXPROPLEN];
3022290756Smav	int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
3023290756Smav	    token_buf, sizeof (token_buf),
3024290756Smav	    NULL, NULL, 0, B_TRUE);
3025290756Smav	if (error == 0) {
3026290756Smav		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3027290756Smav		    "checksum mismatch or incomplete stream.\n"
3028290756Smav		    "Partially received snapshot is saved.\n"
3029290756Smav		    "A resuming stream can be generated on the sending "
3030290756Smav		    "system by running:\n"
3031290756Smav		    "    zfs send -t %s"),
3032290756Smav		    token_buf);
3033290756Smav	}
3034290756Smav	zfs_close(zhp);
3035290756Smav}
3036290756Smav
3037185029Spjd/*
3038185029Spjd * Restores a backup of tosnap from the file descriptor specified by infd.
3039185029Spjd */
3040185029Spjdstatic int
3041185029Spjdzfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
3042288571Smav    const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
3043288571Smav    dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
3044288571Smav    avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3045297103Smav    uint64_t *action_handlep, const char *finalsnap)
3046185029Spjd{
3047185029Spjd	zfs_cmd_t zc = { 0 };
3048185029Spjd	time_t begin_time;
3049219089Spjd	int ioctl_err, ioctl_errno, err;
3050185029Spjd	char *cp;
3051185029Spjd	struct drr_begin *drrb = &drr->drr_u.drr_begin;
3052185029Spjd	char errbuf[1024];
3053219089Spjd	char prop_errbuf[1024];
3054219089Spjd	const char *chopprefix;
3055185029Spjd	boolean_t newfs = B_FALSE;
3056185029Spjd	boolean_t stream_wantsnewfs;
3057185029Spjd	uint64_t parent_snapguid = 0;
3058185029Spjd	prop_changelist_t *clp = NULL;
3059185029Spjd	nvlist_t *snapprops_nvlist = NULL;
3060219089Spjd	zprop_errflags_t prop_errflags;
3061219089Spjd	boolean_t recursive;
3062297103Smav	char *snapname = NULL;
3063185029Spjd
3064185029Spjd	begin_time = time(NULL);
3065185029Spjd
3066185029Spjd	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3067185029Spjd	    "cannot receive"));
3068185029Spjd
3069219089Spjd	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3070219089Spjd	    ENOENT);
3071219089Spjd
3072185029Spjd	if (stream_avl != NULL) {
3073185029Spjd		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
3074185029Spjd		    &snapname);
3075185029Spjd		nvlist_t *props;
3076185029Spjd		int ret;
3077185029Spjd
3078185029Spjd		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
3079185029Spjd		    &parent_snapguid);
3080185029Spjd		err = nvlist_lookup_nvlist(fs, "props", &props);
3081185029Spjd		if (err)
3082185029Spjd			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
3083185029Spjd
3084228103Smm		if (flags->canmountoff) {
3085185029Spjd			VERIFY(0 == nvlist_add_uint64(props,
3086185029Spjd			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
3087185029Spjd		}
3088185029Spjd		ret = zcmd_write_src_nvlist(hdl, &zc, props);
3089185029Spjd		if (err)
3090185029Spjd			nvlist_free(props);
3091185029Spjd
3092185029Spjd		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
3093185029Spjd			VERIFY(0 == nvlist_lookup_nvlist(props,
3094185029Spjd			    snapname, &snapprops_nvlist));
3095185029Spjd		}
3096185029Spjd
3097185029Spjd		if (ret != 0)
3098185029Spjd			return (-1);
3099185029Spjd	}
3100185029Spjd
3101219089Spjd	cp = NULL;
3102219089Spjd
3103185029Spjd	/*
3104185029Spjd	 * Determine how much of the snapshot name stored in the stream
3105185029Spjd	 * we are going to tack on to the name they specified on the
3106185029Spjd	 * command line, and how much we are going to chop off.
3107185029Spjd	 *
3108185029Spjd	 * If they specified a snapshot, chop the entire name stored in
3109185029Spjd	 * the stream.
3110185029Spjd	 */
3111228103Smm	if (flags->istail) {
3112185029Spjd		/*
3113219089Spjd		 * A filesystem was specified with -e. We want to tack on only
3114219089Spjd		 * the tail of the sent snapshot path.
3115185029Spjd		 */
3116185029Spjd		if (strchr(tosnap, '@')) {
3117185029Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3118219089Spjd			    "argument - snapshot not allowed with -e"));
3119219089Spjd			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3120219089Spjd		}
3121219089Spjd
3122219089Spjd		chopprefix = strrchr(sendfs, '/');
3123219089Spjd
3124219089Spjd		if (chopprefix == NULL) {
3125219089Spjd			/*
3126219089Spjd			 * The tail is the poolname, so we need to
3127219089Spjd			 * prepend a path separator.
3128219089Spjd			 */
3129219089Spjd			int len = strlen(drrb->drr_toname);
3130219089Spjd			cp = malloc(len + 2);
3131219089Spjd			cp[0] = '/';
3132219089Spjd			(void) strcpy(&cp[1], drrb->drr_toname);
3133219089Spjd			chopprefix = cp;
3134219089Spjd		} else {
3135219089Spjd			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
3136219089Spjd		}
3137228103Smm	} else if (flags->isprefix) {
3138219089Spjd		/*
3139219089Spjd		 * A filesystem was specified with -d. We want to tack on
3140219089Spjd		 * everything but the first element of the sent snapshot path
3141219089Spjd		 * (all but the pool name).
3142219089Spjd		 */
3143219089Spjd		if (strchr(tosnap, '@')) {
3144219089Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3145185029Spjd			    "argument - snapshot not allowed with -d"));
3146185029Spjd			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3147185029Spjd		}
3148219089Spjd
3149219089Spjd		chopprefix = strchr(drrb->drr_toname, '/');
3150219089Spjd		if (chopprefix == NULL)
3151219089Spjd			chopprefix = strchr(drrb->drr_toname, '@');
3152185029Spjd	} else if (strchr(tosnap, '@') == NULL) {
3153185029Spjd		/*
3154219089Spjd		 * If a filesystem was specified without -d or -e, we want to
3155219089Spjd		 * tack on everything after the fs specified by 'zfs send'.
3156185029Spjd		 */
3157219089Spjd		chopprefix = drrb->drr_toname + strlen(sendfs);
3158219089Spjd	} else {
3159219089Spjd		/* A snapshot was specified as an exact path (no -d or -e). */
3160219089Spjd		if (recursive) {
3161219089Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3162219089Spjd			    "cannot specify snapshot name for multi-snapshot "
3163219089Spjd			    "stream"));
3164219089Spjd			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3165219089Spjd		}
3166219089Spjd		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
3167185029Spjd	}
3168185029Spjd
3169219089Spjd	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
3170219089Spjd	ASSERT(chopprefix > drrb->drr_toname);
3171219089Spjd	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
3172219089Spjd	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
3173219089Spjd	    chopprefix[0] == '\0');
3174219089Spjd
3175185029Spjd	/*
3176185029Spjd	 * Determine name of destination snapshot, store in zc_value.
3177185029Spjd	 */
3178185029Spjd	(void) strcpy(zc.zc_value, tosnap);
3179219089Spjd	(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
3180248571Smm#ifdef __FreeBSD__
3181248571Smm	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
3182248571Smm		zfs_ioctl_version = get_zfs_ioctl_version();
3183248571Smm	/*
3184248571Smm	 * For forward compatibility hide tosnap in zc_value
3185248571Smm	 */
3186248571Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
3187248571Smm		(void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap);
3188248571Smm#endif
3189219089Spjd	free(cp);
3190185029Spjd	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
3191185029Spjd		zcmd_free_nvlists(&zc);
3192185029Spjd		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3193185029Spjd	}
3194185029Spjd
3195185029Spjd	/*
3196185029Spjd	 * Determine the name of the origin snapshot, store in zc_string.
3197185029Spjd	 */
3198325152Savg	if (originsnap) {
3199325152Savg		(void) strncpy(zc.zc_string, originsnap, sizeof (zc.zc_string));
3200325152Savg		if (flags->verbose)
3201325152Savg			(void) printf("using provided clone origin %s\n",
3202325152Savg			    zc.zc_string);
3203325152Savg	} else if (drrb->drr_flags & DRR_FLAG_CLONE) {
3204228103Smm		if (guid_to_name(hdl, zc.zc_value,
3205290756Smav		    drrb->drr_fromguid, B_FALSE, zc.zc_string) != 0) {
3206185029Spjd			zcmd_free_nvlists(&zc);
3207185029Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3208185029Spjd			    "local origin for clone %s does not exist"),
3209185029Spjd			    zc.zc_value);
3210185029Spjd			return (zfs_error(hdl, EZFS_NOENT, errbuf));
3211185029Spjd		}
3212228103Smm		if (flags->verbose)
3213185029Spjd			(void) printf("found clone origin %s\n", zc.zc_string);
3214185029Spjd	}
3215185029Spjd
3216290756Smav	boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3217290756Smav	    DMU_BACKUP_FEATURE_RESUMING;
3218185029Spjd	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
3219290756Smav	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
3220185029Spjd
3221185029Spjd	if (stream_wantsnewfs) {
3222185029Spjd		/*
3223185029Spjd		 * if the parent fs does not exist, look for it based on
3224185029Spjd		 * the parent snap GUID
3225185029Spjd		 */
3226185029Spjd		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3227185029Spjd		    "cannot receive new filesystem stream"));
3228185029Spjd
3229185029Spjd		(void) strcpy(zc.zc_name, zc.zc_value);
3230185029Spjd		cp = strrchr(zc.zc_name, '/');
3231185029Spjd		if (cp)
3232185029Spjd			*cp = '\0';
3233185029Spjd		if (cp &&
3234185029Spjd		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3235307122Smav			char suffix[ZFS_MAX_DATASET_NAME_LEN];
3236185029Spjd			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
3237228103Smm			if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
3238290756Smav			    B_FALSE, zc.zc_value) == 0) {
3239185029Spjd				*strchr(zc.zc_value, '@') = '\0';
3240185029Spjd				(void) strcat(zc.zc_value, suffix);
3241185029Spjd			}
3242185029Spjd		}
3243185029Spjd	} else {
3244185029Spjd		/*
3245185029Spjd		 * if the fs does not exist, look for it based on the
3246185029Spjd		 * fromsnap GUID
3247185029Spjd		 */
3248185029Spjd		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3249185029Spjd		    "cannot receive incremental stream"));
3250185029Spjd
3251185029Spjd		(void) strcpy(zc.zc_name, zc.zc_value);
3252185029Spjd		*strchr(zc.zc_name, '@') = '\0';
3253185029Spjd
3254219089Spjd		/*
3255219089Spjd		 * If the exact receive path was specified and this is the
3256219089Spjd		 * topmost path in the stream, then if the fs does not exist we
3257219089Spjd		 * should look no further.
3258219089Spjd		 */
3259228103Smm		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
3260219089Spjd		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
3261219089Spjd		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3262307122Smav			char snap[ZFS_MAX_DATASET_NAME_LEN];
3263185029Spjd			(void) strcpy(snap, strchr(zc.zc_value, '@'));
3264228103Smm			if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
3265290756Smav			    B_FALSE, zc.zc_value) == 0) {
3266185029Spjd				*strchr(zc.zc_value, '@') = '\0';
3267185029Spjd				(void) strcat(zc.zc_value, snap);
3268185029Spjd			}
3269185029Spjd		}
3270185029Spjd	}
3271185029Spjd
3272185029Spjd	(void) strcpy(zc.zc_name, zc.zc_value);
3273185029Spjd	*strchr(zc.zc_name, '@') = '\0';
3274185029Spjd
3275185029Spjd	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3276185029Spjd		zfs_handle_t *zhp;
3277219089Spjd
3278185029Spjd		/*
3279290756Smav		 * Destination fs exists.  It must be one of these cases:
3280290756Smav		 *  - an incremental send stream
3281290756Smav		 *  - the stream specifies a new fs (full stream or clone)
3282290756Smav		 *    and they want us to blow away the existing fs (and
3283290756Smav		 *    have therefore specified -F and removed any snapshots)
3284290756Smav		 *  - we are resuming a failed receive.
3285185029Spjd		 */
3286185029Spjd		if (stream_wantsnewfs) {
3287228103Smm			if (!flags->force) {
3288185029Spjd				zcmd_free_nvlists(&zc);
3289185029Spjd				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3290185029Spjd				    "destination '%s' exists\n"
3291185029Spjd				    "must specify -F to overwrite it"),
3292185029Spjd				    zc.zc_name);
3293185029Spjd				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3294185029Spjd			}
3295185029Spjd			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
3296185029Spjd			    &zc) == 0) {
3297185029Spjd				zcmd_free_nvlists(&zc);
3298185029Spjd				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3299185029Spjd				    "destination has snapshots (eg. %s)\n"
3300185029Spjd				    "must destroy them to overwrite it"),
3301185029Spjd				    zc.zc_name);
3302185029Spjd				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3303185029Spjd			}
3304185029Spjd		}
3305185029Spjd
3306185029Spjd		if ((zhp = zfs_open(hdl, zc.zc_name,
3307185029Spjd		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
3308185029Spjd			zcmd_free_nvlists(&zc);
3309185029Spjd			return (-1);
3310185029Spjd		}
3311185029Spjd
3312185029Spjd		if (stream_wantsnewfs &&
3313185029Spjd		    zhp->zfs_dmustats.dds_origin[0]) {
3314185029Spjd			zcmd_free_nvlists(&zc);
3315185029Spjd			zfs_close(zhp);
3316185029Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3317185029Spjd			    "destination '%s' is a clone\n"
3318185029Spjd			    "must destroy it to overwrite it"),
3319185029Spjd			    zc.zc_name);
3320185029Spjd			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3321185029Spjd		}
3322185029Spjd
3323228103Smm		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
3324185029Spjd		    stream_wantsnewfs) {
3325185029Spjd			/* We can't do online recv in this case */
3326185029Spjd			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
3327185029Spjd			if (clp == NULL) {
3328219089Spjd				zfs_close(zhp);
3329185029Spjd				zcmd_free_nvlists(&zc);
3330185029Spjd				return (-1);
3331185029Spjd			}
3332185029Spjd			if (changelist_prefix(clp) != 0) {
3333185029Spjd				changelist_free(clp);
3334219089Spjd				zfs_close(zhp);
3335185029Spjd				zcmd_free_nvlists(&zc);
3336185029Spjd				return (-1);
3337185029Spjd			}
3338185029Spjd		}
3339290756Smav
3340290756Smav		/*
3341290756Smav		 * If we are resuming a newfs, set newfs here so that we will
3342290756Smav		 * mount it if the recv succeeds this time.  We can tell
3343290756Smav		 * that it was a newfs on the first recv because the fs
3344290756Smav		 * itself will be inconsistent (if the fs existed when we
3345290756Smav		 * did the first recv, we would have received it into
3346290756Smav		 * .../%recv).
3347290756Smav		 */
3348290756Smav		if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
3349290756Smav			newfs = B_TRUE;
3350290756Smav
3351185029Spjd		zfs_close(zhp);
3352185029Spjd	} else {
3353185029Spjd		/*
3354185029Spjd		 * Destination filesystem does not exist.  Therefore we better
3355185029Spjd		 * be creating a new filesystem (either from a full backup, or
3356185029Spjd		 * a clone).  It would therefore be invalid if the user
3357185029Spjd		 * specified only the pool name (i.e. if the destination name
3358185029Spjd		 * contained no slash character).
3359185029Spjd		 */
3360185029Spjd		if (!stream_wantsnewfs ||
3361185029Spjd		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
3362185029Spjd			zcmd_free_nvlists(&zc);
3363185029Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3364185029Spjd			    "destination '%s' does not exist"), zc.zc_name);
3365185029Spjd			return (zfs_error(hdl, EZFS_NOENT, errbuf));
3366185029Spjd		}
3367185029Spjd
3368185029Spjd		/*
3369185029Spjd		 * Trim off the final dataset component so we perform the
3370185029Spjd		 * recvbackup ioctl to the filesystems's parent.
3371185029Spjd		 */
3372185029Spjd		*cp = '\0';
3373185029Spjd
3374228103Smm		if (flags->isprefix && !flags->istail && !flags->dryrun &&
3375185029Spjd		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
3376185029Spjd			zcmd_free_nvlists(&zc);
3377185029Spjd			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
3378185029Spjd		}
3379185029Spjd
3380185029Spjd		newfs = B_TRUE;
3381185029Spjd	}
3382185029Spjd
3383290756Smav	zc.zc_begin_record = *drr_noswap;
3384185029Spjd	zc.zc_cookie = infd;
3385228103Smm	zc.zc_guid = flags->force;
3386290756Smav	zc.zc_resumable = flags->resumable;
3387228103Smm	if (flags->verbose) {
3388185029Spjd		(void) printf("%s %s stream of %s into %s\n",
3389228103Smm		    flags->dryrun ? "would receive" : "receiving",
3390185029Spjd		    drrb->drr_fromguid ? "incremental" : "full",
3391185029Spjd		    drrb->drr_toname, zc.zc_value);
3392185029Spjd		(void) fflush(stdout);
3393185029Spjd	}
3394185029Spjd
3395228103Smm	if (flags->dryrun) {
3396185029Spjd		zcmd_free_nvlists(&zc);
3397228103Smm		return (recv_skip(hdl, infd, flags->byteswap));
3398185029Spjd	}
3399185029Spjd
3400219089Spjd	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
3401219089Spjd	zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
3402219089Spjd	zc.zc_cleanup_fd = cleanup_fd;
3403219089Spjd	zc.zc_action_handle = *action_handlep;
3404219089Spjd
3405185029Spjd	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
3406185029Spjd	ioctl_errno = errno;
3407219089Spjd	prop_errflags = (zprop_errflags_t)zc.zc_obj;
3408219089Spjd
3409219089Spjd	if (err == 0) {
3410219089Spjd		nvlist_t *prop_errors;
3411219089Spjd		VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
3412219089Spjd		    zc.zc_nvlist_dst_size, &prop_errors, 0));
3413219089Spjd
3414219089Spjd		nvpair_t *prop_err = NULL;
3415219089Spjd
3416219089Spjd		while ((prop_err = nvlist_next_nvpair(prop_errors,
3417219089Spjd		    prop_err)) != NULL) {
3418219089Spjd			char tbuf[1024];
3419219089Spjd			zfs_prop_t prop;
3420219089Spjd			int intval;
3421219089Spjd
3422219089Spjd			prop = zfs_name_to_prop(nvpair_name(prop_err));
3423219089Spjd			(void) nvpair_value_int32(prop_err, &intval);
3424219089Spjd			if (strcmp(nvpair_name(prop_err),
3425219089Spjd			    ZPROP_N_MORE_ERRORS) == 0) {
3426219089Spjd				trunc_prop_errs(intval);
3427219089Spjd				break;
3428297103Smav			} else if (snapname == NULL || finalsnap == NULL ||
3429297103Smav			    strcmp(finalsnap, snapname) == 0 ||
3430297103Smav			    strcmp(nvpair_name(prop_err),
3431297103Smav			    zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
3432297103Smav				/*
3433297103Smav				 * Skip the special case of, for example,
3434297103Smav				 * "refquota", errors on intermediate
3435297103Smav				 * snapshots leading up to a final one.
3436297103Smav				 * That's why we have all of the checks above.
3437297103Smav				 *
3438297103Smav				 * See zfs_ioctl.c's extract_delay_props() for
3439297103Smav				 * a list of props which can fail on
3440297103Smav				 * intermediate snapshots, but shouldn't
3441297103Smav				 * affect the overall receive.
3442297103Smav				 */
3443219089Spjd				(void) snprintf(tbuf, sizeof (tbuf),
3444219089Spjd				    dgettext(TEXT_DOMAIN,
3445219089Spjd				    "cannot receive %s property on %s"),
3446219089Spjd				    nvpair_name(prop_err), zc.zc_name);
3447219089Spjd				zfs_setprop_error(hdl, prop, intval, tbuf);
3448219089Spjd			}
3449219089Spjd		}
3450219089Spjd		nvlist_free(prop_errors);
3451219089Spjd	}
3452219089Spjd
3453219089Spjd	zc.zc_nvlist_dst = 0;
3454219089Spjd	zc.zc_nvlist_dst_size = 0;
3455185029Spjd	zcmd_free_nvlists(&zc);
3456185029Spjd
3457185029Spjd	if (err == 0 && snapprops_nvlist) {
3458185029Spjd		zfs_cmd_t zc2 = { 0 };
3459185029Spjd
3460185029Spjd		(void) strcpy(zc2.zc_name, zc.zc_value);
3461219089Spjd		zc2.zc_cookie = B_TRUE; /* received */
3462185029Spjd		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
3463185029Spjd			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
3464185029Spjd			zcmd_free_nvlists(&zc2);
3465185029Spjd		}
3466185029Spjd	}
3467185029Spjd
3468219089Spjd	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
3469185029Spjd		/*
3470185029Spjd		 * It may be that this snapshot already exists,
3471185029Spjd		 * in which case we want to consume & ignore it
3472185029Spjd		 * rather than failing.
3473185029Spjd		 */
3474185029Spjd		avl_tree_t *local_avl;
3475185029Spjd		nvlist_t *local_nv, *fs;
3476219089Spjd		cp = strchr(zc.zc_value, '@');
3477185029Spjd
3478185029Spjd		/*
3479185029Spjd		 * XXX Do this faster by just iterating over snaps in
3480185029Spjd		 * this fs.  Also if zc_value does not exist, we will
3481185029Spjd		 * get a strange "does not exist" error message.
3482185029Spjd		 */
3483185029Spjd		*cp = '\0';
3484219089Spjd		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
3485307117Smav		    B_FALSE, &local_nv, &local_avl) == 0) {
3486185029Spjd			*cp = '@';
3487185029Spjd			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
3488185029Spjd			fsavl_destroy(local_avl);
3489185029Spjd			nvlist_free(local_nv);
3490185029Spjd
3491185029Spjd			if (fs != NULL) {
3492228103Smm				if (flags->verbose) {
3493185029Spjd					(void) printf("snap %s already exists; "
3494185029Spjd					    "ignoring\n", zc.zc_value);
3495185029Spjd				}
3496219089Spjd				err = ioctl_err = recv_skip(hdl, infd,
3497228103Smm				    flags->byteswap);
3498185029Spjd			}
3499185029Spjd		}
3500185029Spjd		*cp = '@';
3501185029Spjd	}
3502185029Spjd
3503185029Spjd	if (ioctl_err != 0) {
3504185029Spjd		switch (ioctl_errno) {
3505185029Spjd		case ENODEV:
3506185029Spjd			cp = strchr(zc.zc_value, '@');
3507185029Spjd			*cp = '\0';
3508185029Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3509185029Spjd			    "most recent snapshot of %s does not\n"
3510185029Spjd			    "match incremental source"), zc.zc_value);
3511185029Spjd			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3512185029Spjd			*cp = '@';
3513185029Spjd			break;
3514185029Spjd		case ETXTBSY:
3515185029Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3516185029Spjd			    "destination %s has been modified\n"
3517185029Spjd			    "since most recent snapshot"), zc.zc_name);
3518185029Spjd			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3519185029Spjd			break;
3520185029Spjd		case EEXIST:
3521185029Spjd			cp = strchr(zc.zc_value, '@');
3522185029Spjd			if (newfs) {
3523185029Spjd				/* it's the containing fs that exists */
3524185029Spjd				*cp = '\0';
3525185029Spjd			}
3526185029Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3527185029Spjd			    "destination already exists"));
3528185029Spjd			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
3529185029Spjd			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3530185029Spjd			    zc.zc_value);
3531185029Spjd			*cp = '@';
3532185029Spjd			break;
3533185029Spjd		case EINVAL:
3534185029Spjd			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3535185029Spjd			break;
3536185029Spjd		case ECKSUM:
3537290756Smav			recv_ecksum_set_aux(hdl, zc.zc_value, flags->resumable);
3538185029Spjd			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3539185029Spjd			break;
3540219089Spjd		case ENOTSUP:
3541219089Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3542219089Spjd			    "pool must be upgraded to receive this stream."));
3543219089Spjd			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
3544219089Spjd			break;
3545219089Spjd		case EDQUOT:
3546219089Spjd			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3547219089Spjd			    "destination %s space quota exceeded"), zc.zc_name);
3548228103Smm			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
3549219089Spjd			break;
3550185029Spjd		default:
3551185029Spjd			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
3552185029Spjd		}
3553185029Spjd	}
3554185029Spjd
3555185029Spjd	/*
3556219089Spjd	 * Mount the target filesystem (if created).  Also mount any
3557219089Spjd	 * children of the target filesystem if we did a replication
3558219089Spjd	 * receive (indicated by stream_avl being non-NULL).
3559185029Spjd	 */
3560185029Spjd	cp = strchr(zc.zc_value, '@');
3561185029Spjd	if (cp && (ioctl_err == 0 || !newfs)) {
3562185029Spjd		zfs_handle_t *h;
3563185029Spjd
3564185029Spjd		*cp = '\0';
3565185029Spjd		h = zfs_open(hdl, zc.zc_value,
3566185029Spjd		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3567185029Spjd		if (h != NULL) {
3568185029Spjd			if (h->zfs_type == ZFS_TYPE_VOLUME) {
3569185029Spjd				*cp = '@';
3570219089Spjd			} else if (newfs || stream_avl) {
3571185029Spjd				/*
3572185029Spjd				 * Track the first/top of hierarchy fs,
3573185029Spjd				 * for mounting and sharing later.
3574185029Spjd				 */
3575185029Spjd				if (top_zfs && *top_zfs == NULL)
3576185029Spjd					*top_zfs = zfs_strdup(hdl, zc.zc_value);
3577185029Spjd			}
3578185029Spjd			zfs_close(h);
3579185029Spjd		}
3580185029Spjd		*cp = '@';
3581185029Spjd	}
3582185029Spjd
3583185029Spjd	if (clp) {
3584298264Savg		if (!flags->nomount)
3585298264Savg			err |= changelist_postfix(clp);
3586185029Spjd		changelist_free(clp);
3587185029Spjd	}
3588185029Spjd
3589219089Spjd	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3590219089Spjd		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3591219089Spjd		    "failed to clear unreceived properties on %s"),
3592219089Spjd		    zc.zc_name);
3593219089Spjd		(void) fprintf(stderr, "\n");
3594219089Spjd	}
3595219089Spjd	if (prop_errflags & ZPROP_ERR_NORESTORE) {
3596219089Spjd		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3597219089Spjd		    "failed to restore original properties on %s"),
3598219089Spjd		    zc.zc_name);
3599219089Spjd		(void) fprintf(stderr, "\n");
3600219089Spjd	}
3601219089Spjd
3602185029Spjd	if (err || ioctl_err)
3603185029Spjd		return (-1);
3604185029Spjd
3605219089Spjd	*action_handlep = zc.zc_action_handle;
3606219089Spjd
3607228103Smm	if (flags->verbose) {
3608185029Spjd		char buf1[64];
3609185029Spjd		char buf2[64];
3610185029Spjd		uint64_t bytes = zc.zc_cookie;
3611185029Spjd		time_t delta = time(NULL) - begin_time;
3612185029Spjd		if (delta == 0)
3613185029Spjd			delta = 1;
3614185029Spjd		zfs_nicenum(bytes, buf1, sizeof (buf1));
3615185029Spjd		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3616185029Spjd
3617185029Spjd		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3618185029Spjd		    buf1, delta, buf2);
3619185029Spjd	}
3620185029Spjd
3621185029Spjd	return (0);
3622185029Spjd}
3623185029Spjd
3624185029Spjdstatic int
3625288571Smavzfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
3626288571Smav    const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
3627288571Smav    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3628297103Smav    uint64_t *action_handlep, const char *finalsnap)
3629185029Spjd{
3630185029Spjd	int err;
3631185029Spjd	dmu_replay_record_t drr, drr_noswap;
3632185029Spjd	struct drr_begin *drrb = &drr.drr_u.drr_begin;
3633185029Spjd	char errbuf[1024];
3634185029Spjd	zio_cksum_t zcksum = { 0 };
3635219089Spjd	uint64_t featureflags;
3636219089Spjd	int hdrtype;
3637185029Spjd
3638185029Spjd	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3639185029Spjd	    "cannot receive"));
3640185029Spjd
3641228103Smm	if (flags->isprefix &&
3642185029Spjd	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3643185029Spjd		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3644185029Spjd		    "(%s) does not exist"), tosnap);
3645185029Spjd		return (zfs_error(hdl, EZFS_NOENT, errbuf));
3646185029Spjd	}
3647288571Smav	if (originsnap &&
3648288571Smav	    !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
3649288571Smav		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
3650288571Smav		    "(%s) does not exist"), originsnap);
3651288571Smav		return (zfs_error(hdl, EZFS_NOENT, errbuf));
3652288571Smav	}
3653185029Spjd
3654185029Spjd	/* read in the BEGIN record */
3655185029Spjd	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3656185029Spjd	    &zcksum)))
3657185029Spjd		return (err);
3658185029Spjd
3659185029Spjd	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3660185029Spjd		/* It's the double end record at the end of a package */
3661185029Spjd		return (ENODATA);
3662185029Spjd	}
3663185029Spjd
3664185029Spjd	/* the kernel needs the non-byteswapped begin record */
3665185029Spjd	drr_noswap = drr;
3666185029Spjd
3667228103Smm	flags->byteswap = B_FALSE;
3668185029Spjd	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3669185029Spjd		/*
3670185029Spjd		 * We computed the checksum in the wrong byteorder in
3671185029Spjd		 * recv_read() above; do it again correctly.
3672185029Spjd		 */
3673185029Spjd		bzero(&zcksum, sizeof (zio_cksum_t));
3674185029Spjd		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
3675228103Smm		flags->byteswap = B_TRUE;
3676185029Spjd
3677185029Spjd		drr.drr_type = BSWAP_32(drr.drr_type);
3678185029Spjd		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3679185029Spjd		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3680219089Spjd		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3681185029Spjd		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3682185029Spjd		drrb->drr_type = BSWAP_32(drrb->drr_type);
3683185029Spjd		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3684185029Spjd		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3685185029Spjd		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3686185029Spjd	}
3687185029Spjd
3688185029Spjd	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3689185029Spjd		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3690185029Spjd		    "stream (bad magic number)"));
3691185029Spjd		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3692185029Spjd	}
3693185029Spjd
3694219089Spjd	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3695219089Spjd	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3696219089Spjd
3697219089Spjd	if (!DMU_STREAM_SUPPORTED(featureflags) ||
3698219089Spjd	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3699219089Spjd		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3700219089Spjd		    "stream has unsupported feature, feature flags = %lx"),
3701219089Spjd		    featureflags);
3702219089Spjd		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3703219089Spjd	}
3704219089Spjd
3705185029Spjd	if (strchr(drrb->drr_toname, '@') == NULL) {
3706185029Spjd		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3707185029Spjd		    "stream (bad snapshot name)"));
3708185029Spjd		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3709185029Spjd	}
3710185029Spjd
3711219089Spjd	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3712307122Smav		char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
3713219089Spjd		if (sendfs == NULL) {
3714219089Spjd			/*
3715219089Spjd			 * We were not called from zfs_receive_package(). Get
3716219089Spjd			 * the fs specified by 'zfs send'.
3717219089Spjd			 */
3718219089Spjd			char *cp;
3719219089Spjd			(void) strlcpy(nonpackage_sendfs,
3720307122Smav			    drr.drr_u.drr_begin.drr_toname,
3721307122Smav			    sizeof (nonpackage_sendfs));
3722219089Spjd			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3723219089Spjd				*cp = '\0';
3724219089Spjd			sendfs = nonpackage_sendfs;
3725297103Smav			VERIFY(finalsnap == NULL);
3726219089Spjd		}
3727288571Smav		return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
3728288571Smav		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
3729297103Smav		    cleanup_fd, action_handlep, finalsnap));
3730219089Spjd	} else {
3731219089Spjd		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3732219089Spjd		    DMU_COMPOUNDSTREAM);
3733288571Smav		return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
3734288571Smav		    &zcksum, top_zfs, cleanup_fd, action_handlep));
3735185029Spjd	}
3736185029Spjd}
3737185029Spjd
3738185029Spjd/*
3739185029Spjd * Restores a backup of tosnap from the file descriptor specified by infd.
3740185029Spjd * Return 0 on total success, -2 if some things couldn't be
3741185029Spjd * destroyed/renamed/promoted, -1 if some things couldn't be received.
3742290756Smav * (-1 will override -2, if -1 and the resumable flag was specified the
3743290756Smav * transfer can be resumed if the sending side supports it).
3744185029Spjd */
3745185029Spjdint
3746288571Smavzfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
3747288571Smav    recvflags_t *flags, int infd, avl_tree_t *stream_avl)
3748185029Spjd{
3749185029Spjd	char *top_zfs = NULL;
3750185029Spjd	int err;
3751219089Spjd	int cleanup_fd;
3752219089Spjd	uint64_t action_handle = 0;
3753288571Smav	char *originsnap = NULL;
3754288571Smav	if (props) {
3755288571Smav		err = nvlist_lookup_string(props, "origin", &originsnap);
3756288571Smav		if (err && err != ENOENT)
3757288571Smav			return (err);
3758288571Smav	}
3759185029Spjd
3760219089Spjd	cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
3761219089Spjd	VERIFY(cleanup_fd >= 0);
3762185029Spjd
3763288571Smav	err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
3764297103Smav	    stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL);
3765219089Spjd
3766219089Spjd	VERIFY(0 == close(cleanup_fd));
3767219089Spjd
3768228103Smm	if (err == 0 && !flags->nomount && top_zfs) {
3769185029Spjd		zfs_handle_t *zhp;
3770185029Spjd		prop_changelist_t *clp;
3771185029Spjd
3772185029Spjd		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3773185029Spjd		if (zhp != NULL) {
3774185029Spjd			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3775185029Spjd			    CL_GATHER_MOUNT_ALWAYS, 0);
3776185029Spjd			zfs_close(zhp);
3777185029Spjd			if (clp != NULL) {
3778185029Spjd				/* mount and share received datasets */
3779185029Spjd				err = changelist_postfix(clp);
3780185029Spjd				changelist_free(clp);
3781185029Spjd			}
3782185029Spjd		}
3783185029Spjd		if (zhp == NULL || clp == NULL || err)
3784185029Spjd			err = -1;
3785185029Spjd	}
3786185029Spjd	if (top_zfs)
3787185029Spjd		free(top_zfs);
3788185029Spjd
3789185029Spjd	return (err);
3790185029Spjd}
3791