1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23262320Sdelphij * Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org>
24268659Sdelphij * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
25265744Sdelphij * Copyright (c) 2014, Joyent, Inc. All rights reserved.
26262320Sdelphij * Copyright (c) 2014 RackTop Systems.
27168404Spjd */
28168404Spjd
29168404Spjd#include <sys/dmu_objset.h>
30168404Spjd#include <sys/dsl_dataset.h>
31168404Spjd#include <sys/dsl_dir.h>
32168404Spjd#include <sys/dsl_prop.h>
33168404Spjd#include <sys/dsl_synctask.h>
34168404Spjd#include <sys/dmu_traverse.h>
35235222Smm#include <sys/dmu_impl.h>
36168404Spjd#include <sys/dmu_tx.h>
37168404Spjd#include <sys/arc.h>
38168404Spjd#include <sys/zio.h>
39168404Spjd#include <sys/zap.h>
40236884Smm#include <sys/zfeature.h>
41168404Spjd#include <sys/unique.h>
42168404Spjd#include <sys/zfs_context.h>
43168676Spjd#include <sys/zfs_ioctl.h>
44185029Spjd#include <sys/spa.h>
45185029Spjd#include <sys/zfs_znode.h>
46219089Spjd#include <sys/zfs_onexit.h>
47219089Spjd#include <sys/zvol.h>
48219089Spjd#include <sys/dsl_scan.h>
49219089Spjd#include <sys/dsl_deadlist.h>
50248571Smm#include <sys/dsl_destroy.h>
51248571Smm#include <sys/dsl_userhold.h>
52263407Sdelphij#include <sys/dsl_bookmark.h>
53168404Spjd
54219089Spjd#define	SWITCH64(x, y) \
55219089Spjd	{ \
56219089Spjd		uint64_t __tmp = (x); \
57219089Spjd		(x) = (y); \
58219089Spjd		(y) = __tmp; \
59219089Spjd	}
60219089Spjd
61168404Spjd#define	DS_REF_MAX	(1ULL << 62)
62168404Spjd
63168404Spjd#define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
64168404Spjd
65168404Spjd/*
66185029Spjd * Figure out how much of this delta should be propogated to the dsl_dir
67185029Spjd * layer.  If there's a refreservation, that space has already been
68185029Spjd * partially accounted for in our ancestors.
69168404Spjd */
70185029Spjdstatic int64_t
71185029Spjdparent_delta(dsl_dataset_t *ds, int64_t delta)
72185029Spjd{
73185029Spjd	uint64_t old_bytes, new_bytes;
74168404Spjd
75185029Spjd	if (ds->ds_reserved == 0)
76185029Spjd		return (delta);
77168404Spjd
78185029Spjd	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
79185029Spjd	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
80185029Spjd
81185029Spjd	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
82185029Spjd	return (new_bytes - old_bytes);
83185029Spjd}
84185029Spjd
85168404Spjdvoid
86219089Spjddsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
87168404Spjd{
88219089Spjd	int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
89168404Spjd	int compressed = BP_GET_PSIZE(bp);
90168404Spjd	int uncompressed = BP_GET_UCSIZE(bp);
91185029Spjd	int64_t delta;
92168404Spjd
93219089Spjd	dprintf_bp(bp, "ds=%p", ds);
94168404Spjd
95168404Spjd	ASSERT(dmu_tx_is_syncing(tx));
96168404Spjd	/* It could have been compressed away to nothing */
97168404Spjd	if (BP_IS_HOLE(bp))
98168404Spjd		return;
99168404Spjd	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
100236884Smm	ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
101168404Spjd	if (ds == NULL) {
102239620Smm		dsl_pool_mos_diduse_space(tx->tx_pool,
103239620Smm		    used, compressed, uncompressed);
104168404Spjd		return;
105168404Spjd	}
106254757Sdelphij
107168404Spjd	dmu_buf_will_dirty(ds->ds_dbuf, tx);
108168404Spjd	mutex_enter(&ds->ds_lock);
109185029Spjd	delta = parent_delta(ds, used);
110236884Smm	ds->ds_phys->ds_referenced_bytes += used;
111168404Spjd	ds->ds_phys->ds_compressed_bytes += compressed;
112168404Spjd	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
113168404Spjd	ds->ds_phys->ds_unique_bytes += used;
114168404Spjd	mutex_exit(&ds->ds_lock);
115185029Spjd	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
116185029Spjd	    compressed, uncompressed, tx);
117185029Spjd	dsl_dir_transfer_space(ds->ds_dir, used - delta,
118185029Spjd	    DD_USED_REFRSRV, DD_USED_HEAD, tx);
119168404Spjd}
120168404Spjd
121185029Spjdint
122219089Spjddsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
123219089Spjd    boolean_t async)
124168404Spjd{
125263397Sdelphij	int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
126263397Sdelphij	int compressed = BP_GET_PSIZE(bp);
127263397Sdelphij	int uncompressed = BP_GET_UCSIZE(bp);
128263397Sdelphij
129219089Spjd	if (BP_IS_HOLE(bp))
130219089Spjd		return (0);
131219089Spjd
132219089Spjd	ASSERT(dmu_tx_is_syncing(tx));
133219089Spjd	ASSERT(bp->blk_birth <= tx->tx_txg);
134219089Spjd
135168404Spjd	if (ds == NULL) {
136219089Spjd		dsl_free(tx->tx_pool, tx->tx_txg, bp);
137239620Smm		dsl_pool_mos_diduse_space(tx->tx_pool,
138239620Smm		    -used, -compressed, -uncompressed);
139185029Spjd		return (used);
140168404Spjd	}
141168404Spjd	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
142168404Spjd
143185029Spjd	ASSERT(!dsl_dataset_is_snapshot(ds));
144168404Spjd	dmu_buf_will_dirty(ds->ds_dbuf, tx);
145168404Spjd
146168404Spjd	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
147185029Spjd		int64_t delta;
148168404Spjd
149219089Spjd		dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
150219089Spjd		dsl_free(tx->tx_pool, tx->tx_txg, bp);
151168404Spjd
152168404Spjd		mutex_enter(&ds->ds_lock);
153185029Spjd		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
154185029Spjd		    !DS_UNIQUE_IS_ACCURATE(ds));
155185029Spjd		delta = parent_delta(ds, -used);
156168404Spjd		ds->ds_phys->ds_unique_bytes -= used;
157168404Spjd		mutex_exit(&ds->ds_lock);
158185029Spjd		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
159185029Spjd		    delta, -compressed, -uncompressed, tx);
160185029Spjd		dsl_dir_transfer_space(ds->ds_dir, -used - delta,
161185029Spjd		    DD_USED_REFRSRV, DD_USED_HEAD, tx);
162168404Spjd	} else {
163168404Spjd		dprintf_bp(bp, "putting on dead list: %s", "");
164219089Spjd		if (async) {
165219089Spjd			/*
166219089Spjd			 * We are here as part of zio's write done callback,
167219089Spjd			 * which means we're a zio interrupt thread.  We can't
168219089Spjd			 * call dsl_deadlist_insert() now because it may block
169219089Spjd			 * waiting for I/O.  Instead, put bp on the deferred
170219089Spjd			 * queue and let dsl_pool_sync() finish the job.
171219089Spjd			 */
172219089Spjd			bplist_append(&ds->ds_pending_deadlist, bp);
173219089Spjd		} else {
174219089Spjd			dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
175219089Spjd		}
176185029Spjd		ASSERT3U(ds->ds_prev->ds_object, ==,
177185029Spjd		    ds->ds_phys->ds_prev_snap_obj);
178185029Spjd		ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
179168404Spjd		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
180185029Spjd		if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
181185029Spjd		    ds->ds_object && bp->blk_birth >
182185029Spjd		    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
183185029Spjd			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
184185029Spjd			mutex_enter(&ds->ds_prev->ds_lock);
185185029Spjd			ds->ds_prev->ds_phys->ds_unique_bytes += used;
186185029Spjd			mutex_exit(&ds->ds_prev->ds_lock);
187168404Spjd		}
188219089Spjd		if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
189185029Spjd			dsl_dir_transfer_space(ds->ds_dir, used,
190185029Spjd			    DD_USED_HEAD, DD_USED_SNAP, tx);
191185029Spjd		}
192168404Spjd	}
193168404Spjd	mutex_enter(&ds->ds_lock);
194236884Smm	ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
195236884Smm	ds->ds_phys->ds_referenced_bytes -= used;
196168404Spjd	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
197168404Spjd	ds->ds_phys->ds_compressed_bytes -= compressed;
198168404Spjd	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
199168404Spjd	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
200168404Spjd	mutex_exit(&ds->ds_lock);
201185029Spjd
202185029Spjd	return (used);
203168404Spjd}
204168404Spjd
205168404Spjduint64_t
206168404Spjddsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
207168404Spjd{
208168404Spjd	uint64_t trysnap = 0;
209168404Spjd
210168404Spjd	if (ds == NULL)
211168404Spjd		return (0);
212168404Spjd	/*
213168404Spjd	 * The snapshot creation could fail, but that would cause an
214168404Spjd	 * incorrect FALSE return, which would only result in an
215168404Spjd	 * overestimation of the amount of space that an operation would
216168404Spjd	 * consume, which is OK.
217168404Spjd	 *
218168404Spjd	 * There's also a small window where we could miss a pending
219168404Spjd	 * snapshot, because we could set the sync task in the quiescing
220168404Spjd	 * phase.  So this should only be used as a guess.
221168404Spjd	 */
222168404Spjd	if (ds->ds_trysnap_txg >
223168404Spjd	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
224168404Spjd		trysnap = ds->ds_trysnap_txg;
225168404Spjd	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
226168404Spjd}
227168404Spjd
228209962Smmboolean_t
229219089Spjddsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
230219089Spjd    uint64_t blk_birth)
231168404Spjd{
232263397Sdelphij	if (blk_birth <= dsl_dataset_prev_snap_txg(ds) ||
233263397Sdelphij	    (bp != NULL && BP_IS_HOLE(bp)))
234219089Spjd		return (B_FALSE);
235219089Spjd
236219089Spjd	ddt_prefetch(dsl_dataset_get_spa(ds), bp);
237219089Spjd
238219089Spjd	return (B_TRUE);
239168404Spjd}
240168404Spjd
241168404Spjd/* ARGSUSED */
242168404Spjdstatic void
243168404Spjddsl_dataset_evict(dmu_buf_t *db, void *dsv)
244168404Spjd{
245168404Spjd	dsl_dataset_t *ds = dsv;
246168404Spjd
247248571Smm	ASSERT(ds->ds_owner == NULL);
248168404Spjd
249185029Spjd	unique_remove(ds->ds_fsid_guid);
250168404Spjd
251219089Spjd	if (ds->ds_objset != NULL)
252219089Spjd		dmu_objset_evict(ds->ds_objset);
253168404Spjd
254168404Spjd	if (ds->ds_prev) {
255248571Smm		dsl_dataset_rele(ds->ds_prev, ds);
256168404Spjd		ds->ds_prev = NULL;
257168404Spjd	}
258168404Spjd
259219089Spjd	bplist_destroy(&ds->ds_pending_deadlist);
260248571Smm	if (ds->ds_phys->ds_deadlist_obj != 0)
261219089Spjd		dsl_deadlist_close(&ds->ds_deadlist);
262185029Spjd	if (ds->ds_dir)
263248571Smm		dsl_dir_rele(ds->ds_dir, ds);
264168404Spjd
265185029Spjd	ASSERT(!list_link_active(&ds->ds_synced_link));
266168404Spjd
267185029Spjd	if (mutex_owned(&ds->ds_lock))
268185029Spjd		mutex_exit(&ds->ds_lock);
269168404Spjd	mutex_destroy(&ds->ds_lock);
270185029Spjd	if (mutex_owned(&ds->ds_opening_lock))
271185029Spjd		mutex_exit(&ds->ds_opening_lock);
272185029Spjd	mutex_destroy(&ds->ds_opening_lock);
273269218Sdelphij	mutex_destroy(&ds->ds_sendstream_lock);
274248571Smm	refcount_destroy(&ds->ds_longholds);
275168404Spjd
276168404Spjd	kmem_free(ds, sizeof (dsl_dataset_t));
277168404Spjd}
278168404Spjd
279248571Smmint
280168404Spjddsl_dataset_get_snapname(dsl_dataset_t *ds)
281168404Spjd{
282168404Spjd	dsl_dataset_phys_t *headphys;
283168404Spjd	int err;
284168404Spjd	dmu_buf_t *headdbuf;
285168404Spjd	dsl_pool_t *dp = ds->ds_dir->dd_pool;
286168404Spjd	objset_t *mos = dp->dp_meta_objset;
287168404Spjd
288168404Spjd	if (ds->ds_snapname[0])
289168404Spjd		return (0);
290168404Spjd	if (ds->ds_phys->ds_next_snap_obj == 0)
291168404Spjd		return (0);
292168404Spjd
293168404Spjd	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
294168404Spjd	    FTAG, &headdbuf);
295248571Smm	if (err != 0)
296168404Spjd		return (err);
297168404Spjd	headphys = headdbuf->db_data;
298168404Spjd	err = zap_value_search(dp->dp_meta_objset,
299185029Spjd	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
300168404Spjd	dmu_buf_rele(headdbuf, FTAG);
301168404Spjd	return (err);
302168404Spjd}
303168404Spjd
304248571Smmint
305185029Spjddsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
306168404Spjd{
307185029Spjd	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
308185029Spjd	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
309185029Spjd	matchtype_t mt;
310185029Spjd	int err;
311185029Spjd
312185029Spjd	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
313185029Spjd		mt = MT_FIRST;
314185029Spjd	else
315185029Spjd		mt = MT_EXACT;
316185029Spjd
317185029Spjd	err = zap_lookup_norm(mos, snapobj, name, 8, 1,
318185029Spjd	    value, mt, NULL, 0, NULL);
319185029Spjd	if (err == ENOTSUP && mt == MT_FIRST)
320185029Spjd		err = zap_lookup(mos, snapobj, name, 8, 1, value);
321185029Spjd	return (err);
322185029Spjd}
323185029Spjd
324248571Smmint
325265744Sdelphijdsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
326265744Sdelphij    boolean_t adj_cnt)
327185029Spjd{
328185029Spjd	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
329185029Spjd	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
330185029Spjd	matchtype_t mt;
331185029Spjd	int err;
332185029Spjd
333219089Spjd	dsl_dir_snap_cmtime_update(ds->ds_dir);
334219089Spjd
335185029Spjd	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
336185029Spjd		mt = MT_FIRST;
337185029Spjd	else
338185029Spjd		mt = MT_EXACT;
339185029Spjd
340185029Spjd	err = zap_remove_norm(mos, snapobj, name, mt, tx);
341185029Spjd	if (err == ENOTSUP && mt == MT_FIRST)
342185029Spjd		err = zap_remove(mos, snapobj, name, tx);
343265744Sdelphij
344265744Sdelphij	if (err == 0 && adj_cnt)
345265744Sdelphij		dsl_fs_ss_count_adjust(ds->ds_dir, -1,
346265744Sdelphij		    DD_FIELD_SNAPSHOT_COUNT, tx);
347265744Sdelphij
348185029Spjd	return (err);
349185029Spjd}
350185029Spjd
351248571Smmint
352248571Smmdsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
353185029Spjd    dsl_dataset_t **dsp)
354185029Spjd{
355168404Spjd	objset_t *mos = dp->dp_meta_objset;
356168404Spjd	dmu_buf_t *dbuf;
357168404Spjd	dsl_dataset_t *ds;
358168404Spjd	int err;
359219089Spjd	dmu_object_info_t doi;
360168404Spjd
361248571Smm	ASSERT(dsl_pool_config_held(dp));
362168404Spjd
363168404Spjd	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
364248571Smm	if (err != 0)
365168404Spjd		return (err);
366219089Spjd
367219089Spjd	/* Make sure dsobj has the correct object type. */
368219089Spjd	dmu_object_info_from_db(dbuf, &doi);
369263390Sdelphij	if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) {
370251632Sdelphij		dmu_buf_rele(dbuf, tag);
371249195Smm		return (SET_ERROR(EINVAL));
372251632Sdelphij	}
373219089Spjd
374168404Spjd	ds = dmu_buf_get_user(dbuf);
375168404Spjd	if (ds == NULL) {
376247187Smm		dsl_dataset_t *winner = NULL;
377168404Spjd
378168404Spjd		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
379168404Spjd		ds->ds_dbuf = dbuf;
380168404Spjd		ds->ds_object = dsobj;
381168404Spjd		ds->ds_phys = dbuf->db_data;
382168404Spjd
383168404Spjd		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
384185029Spjd		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
385235222Smm		mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
386248571Smm		refcount_create(&ds->ds_longholds);
387235222Smm
388219089Spjd		bplist_create(&ds->ds_pending_deadlist);
389219089Spjd		dsl_deadlist_open(&ds->ds_deadlist,
390168404Spjd		    mos, ds->ds_phys->ds_deadlist_obj);
391219089Spjd
392235222Smm		list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
393235222Smm		    offsetof(dmu_sendarg_t, dsa_link));
394235222Smm
395168404Spjd		if (err == 0) {
396248571Smm			err = dsl_dir_hold_obj(dp,
397168404Spjd			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
398168404Spjd		}
399248571Smm		if (err != 0) {
400168404Spjd			mutex_destroy(&ds->ds_lock);
401185029Spjd			mutex_destroy(&ds->ds_opening_lock);
402269218Sdelphij			mutex_destroy(&ds->ds_sendstream_lock);
403248571Smm			refcount_destroy(&ds->ds_longholds);
404219089Spjd			bplist_destroy(&ds->ds_pending_deadlist);
405219089Spjd			dsl_deadlist_close(&ds->ds_deadlist);
406168404Spjd			kmem_free(ds, sizeof (dsl_dataset_t));
407168404Spjd			dmu_buf_rele(dbuf, tag);
408168404Spjd			return (err);
409168404Spjd		}
410168404Spjd
411185029Spjd		if (!dsl_dataset_is_snapshot(ds)) {
412168404Spjd			ds->ds_snapname[0] = '\0';
413248571Smm			if (ds->ds_phys->ds_prev_snap_obj != 0) {
414248571Smm				err = dsl_dataset_hold_obj(dp,
415185029Spjd				    ds->ds_phys->ds_prev_snap_obj,
416185029Spjd				    ds, &ds->ds_prev);
417168404Spjd			}
418263407Sdelphij			if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
419263407Sdelphij				int zaperr = zap_lookup(mos, ds->ds_object,
420263407Sdelphij				    DS_FIELD_BOOKMARK_NAMES,
421263407Sdelphij				    sizeof (ds->ds_bookmarks), 1,
422263407Sdelphij				    &ds->ds_bookmarks);
423263407Sdelphij				if (zaperr != ENOENT)
424263407Sdelphij					VERIFY0(zaperr);
425263407Sdelphij			}
426219089Spjd		} else {
427219089Spjd			if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
428219089Spjd				err = dsl_dataset_get_snapname(ds);
429219089Spjd			if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) {
430219089Spjd				err = zap_count(
431219089Spjd				    ds->ds_dir->dd_pool->dp_meta_objset,
432219089Spjd				    ds->ds_phys->ds_userrefs_obj,
433219089Spjd				    &ds->ds_userrefs);
434168404Spjd			}
435168404Spjd		}
436168404Spjd
437185029Spjd		if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
438248571Smm			err = dsl_prop_get_int_ds(ds,
439248571Smm			    zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
440248571Smm			    &ds->ds_reserved);
441185029Spjd			if (err == 0) {
442248571Smm				err = dsl_prop_get_int_ds(ds,
443248571Smm				    zfs_prop_to_name(ZFS_PROP_REFQUOTA),
444248571Smm				    &ds->ds_quota);
445185029Spjd			}
446185029Spjd		} else {
447185029Spjd			ds->ds_reserved = ds->ds_quota = 0;
448185029Spjd		}
449185029Spjd
450247187Smm		if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds,
451247187Smm		    &ds->ds_phys, dsl_dataset_evict)) != NULL) {
452219089Spjd			bplist_destroy(&ds->ds_pending_deadlist);
453219089Spjd			dsl_deadlist_close(&ds->ds_deadlist);
454185029Spjd			if (ds->ds_prev)
455248571Smm				dsl_dataset_rele(ds->ds_prev, ds);
456248571Smm			dsl_dir_rele(ds->ds_dir, ds);
457168404Spjd			mutex_destroy(&ds->ds_lock);
458185029Spjd			mutex_destroy(&ds->ds_opening_lock);
459269218Sdelphij			mutex_destroy(&ds->ds_sendstream_lock);
460248571Smm			refcount_destroy(&ds->ds_longholds);
461168404Spjd			kmem_free(ds, sizeof (dsl_dataset_t));
462248571Smm			if (err != 0) {
463168404Spjd				dmu_buf_rele(dbuf, tag);
464168404Spjd				return (err);
465168404Spjd			}
466168404Spjd			ds = winner;
467168404Spjd		} else {
468185029Spjd			ds->ds_fsid_guid =
469168404Spjd			    unique_insert(ds->ds_phys->ds_fsid_guid);
470168404Spjd		}
471168404Spjd	}
472168404Spjd	ASSERT3P(ds->ds_dbuf, ==, dbuf);
473168404Spjd	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
474185029Spjd	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
475185029Spjd	    spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
476185029Spjd	    dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
477168404Spjd	*dsp = ds;
478168404Spjd	return (0);
479168404Spjd}
480168404Spjd
481168404Spjdint
482248571Smmdsl_dataset_hold(dsl_pool_t *dp, const char *name,
483219089Spjd    void *tag, dsl_dataset_t **dsp)
484185029Spjd{
485168404Spjd	dsl_dir_t *dd;
486185029Spjd	const char *snapname;
487168404Spjd	uint64_t obj;
488168404Spjd	int err = 0;
489168404Spjd
490248571Smm	err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname);
491248571Smm	if (err != 0)
492168404Spjd		return (err);
493168404Spjd
494248571Smm	ASSERT(dsl_pool_config_held(dp));
495168404Spjd	obj = dd->dd_phys->dd_head_dataset_obj;
496248571Smm	if (obj != 0)
497248571Smm		err = dsl_dataset_hold_obj(dp, obj, tag, dsp);
498185029Spjd	else
499249195Smm		err = SET_ERROR(ENOENT);
500168404Spjd
501185029Spjd	/* we may be looking for a snapshot */
502185029Spjd	if (err == 0 && snapname != NULL) {
503248571Smm		dsl_dataset_t *ds;
504168404Spjd
505185029Spjd		if (*snapname++ != '@') {
506185029Spjd			dsl_dataset_rele(*dsp, tag);
507248571Smm			dsl_dir_rele(dd, FTAG);
508249195Smm			return (SET_ERROR(ENOENT));
509168404Spjd		}
510168404Spjd
511185029Spjd		dprintf("looking for snapshot '%s'\n", snapname);
512185029Spjd		err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
513185029Spjd		if (err == 0)
514248571Smm			err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
515185029Spjd		dsl_dataset_rele(*dsp, tag);
516185029Spjd
517248571Smm		if (err == 0) {
518185029Spjd			mutex_enter(&ds->ds_lock);
519185029Spjd			if (ds->ds_snapname[0] == 0)
520185029Spjd				(void) strlcpy(ds->ds_snapname, snapname,
521185029Spjd				    sizeof (ds->ds_snapname));
522185029Spjd			mutex_exit(&ds->ds_lock);
523248571Smm			*dsp = ds;
524168404Spjd		}
525168404Spjd	}
526248571Smm
527248571Smm	dsl_dir_rele(dd, FTAG);
528168404Spjd	return (err);
529168404Spjd}
530168404Spjd
531168404Spjdint
532248571Smmdsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj,
533219089Spjd    void *tag, dsl_dataset_t **dsp)
534168404Spjd{
535248571Smm	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
536248571Smm	if (err != 0)
537185029Spjd		return (err);
538248571Smm	if (!dsl_dataset_tryown(*dsp, tag)) {
539219089Spjd		dsl_dataset_rele(*dsp, tag);
540248571Smm		*dsp = NULL;
541249195Smm		return (SET_ERROR(EBUSY));
542185029Spjd	}
543185029Spjd	return (0);
544168404Spjd}
545168404Spjd
546248571Smmint
547248571Smmdsl_dataset_own(dsl_pool_t *dp, const char *name,
548248571Smm    void *tag, dsl_dataset_t **dsp)
549248571Smm{
550248571Smm	int err = dsl_dataset_hold(dp, name, tag, dsp);
551248571Smm	if (err != 0)
552248571Smm		return (err);
553248571Smm	if (!dsl_dataset_tryown(*dsp, tag)) {
554248571Smm		dsl_dataset_rele(*dsp, tag);
555249195Smm		return (SET_ERROR(EBUSY));
556248571Smm	}
557248571Smm	return (0);
558248571Smm}
559248571Smm
560248571Smm/*
561248571Smm * See the comment above dsl_pool_hold() for details.  In summary, a long
562248571Smm * hold is used to prevent destruction of a dataset while the pool hold
563248571Smm * is dropped, allowing other concurrent operations (e.g. spa_sync()).
564248571Smm *
565248571Smm * The dataset and pool must be held when this function is called.  After it
566248571Smm * is called, the pool hold may be released while the dataset is still held
567248571Smm * and accessed.
568248571Smm */
569168404Spjdvoid
570248571Smmdsl_dataset_long_hold(dsl_dataset_t *ds, void *tag)
571248571Smm{
572248571Smm	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
573248571Smm	(void) refcount_add(&ds->ds_longholds, tag);
574248571Smm}
575248571Smm
576248571Smmvoid
577248571Smmdsl_dataset_long_rele(dsl_dataset_t *ds, void *tag)
578248571Smm{
579248571Smm	(void) refcount_remove(&ds->ds_longholds, tag);
580248571Smm}
581248571Smm
582248571Smm/* Return B_TRUE if there are any long holds on this dataset. */
583248571Smmboolean_t
584248571Smmdsl_dataset_long_held(dsl_dataset_t *ds)
585248571Smm{
586248571Smm	return (!refcount_is_zero(&ds->ds_longholds));
587248571Smm}
588248571Smm
589248571Smmvoid
590168404Spjddsl_dataset_name(dsl_dataset_t *ds, char *name)
591168404Spjd{
592168404Spjd	if (ds == NULL) {
593168404Spjd		(void) strcpy(name, "mos");
594168404Spjd	} else {
595168404Spjd		dsl_dir_name(ds->ds_dir, name);
596248571Smm		VERIFY0(dsl_dataset_get_snapname(ds));
597168404Spjd		if (ds->ds_snapname[0]) {
598168404Spjd			(void) strcat(name, "@");
599185029Spjd			/*
600185029Spjd			 * We use a "recursive" mutex so that we
601185029Spjd			 * can call dprintf_ds() with ds_lock held.
602185029Spjd			 */
603168404Spjd			if (!MUTEX_HELD(&ds->ds_lock)) {
604168404Spjd				mutex_enter(&ds->ds_lock);
605168404Spjd				(void) strcat(name, ds->ds_snapname);
606168404Spjd				mutex_exit(&ds->ds_lock);
607168404Spjd			} else {
608168404Spjd				(void) strcat(name, ds->ds_snapname);
609168404Spjd			}
610168404Spjd		}
611168404Spjd	}
612168404Spjd}
613168404Spjd
614168404Spjdvoid
615248571Smmdsl_dataset_rele(dsl_dataset_t *ds, void *tag)
616168404Spjd{
617185029Spjd	dmu_buf_rele(ds->ds_dbuf, tag);
618185029Spjd}
619185029Spjd
620185029Spjdvoid
621219089Spjddsl_dataset_disown(dsl_dataset_t *ds, void *tag)
622185029Spjd{
623248571Smm	ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL);
624185029Spjd
625168404Spjd	mutex_enter(&ds->ds_lock);
626185029Spjd	ds->ds_owner = NULL;
627168404Spjd	mutex_exit(&ds->ds_lock);
628248571Smm	dsl_dataset_long_rele(ds, tag);
629248571Smm	if (ds->ds_dbuf != NULL)
630248571Smm		dsl_dataset_rele(ds, tag);
631185029Spjd	else
632219089Spjd		dsl_dataset_evict(NULL, ds);
633185029Spjd}
634168404Spjd
635185029Spjdboolean_t
636248571Smmdsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
637185029Spjd{
638185029Spjd	boolean_t gotit = FALSE;
639185029Spjd
640185029Spjd	mutex_enter(&ds->ds_lock);
641248571Smm	if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
642219089Spjd		ds->ds_owner = tag;
643248571Smm		dsl_dataset_long_hold(ds, tag);
644185029Spjd		gotit = TRUE;
645185029Spjd	}
646185029Spjd	mutex_exit(&ds->ds_lock);
647185029Spjd	return (gotit);
648168404Spjd}
649168404Spjd
650185029Spjduint64_t
651185029Spjddsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
652185029Spjd    uint64_t flags, dmu_tx_t *tx)
653185029Spjd{
654185029Spjd	dsl_pool_t *dp = dd->dd_pool;
655168404Spjd	dmu_buf_t *dbuf;
656168404Spjd	dsl_dataset_phys_t *dsphys;
657168404Spjd	uint64_t dsobj;
658185029Spjd	objset_t *mos = dp->dp_meta_objset;
659168404Spjd
660185029Spjd	if (origin == NULL)
661185029Spjd		origin = dp->dp_origin_snap;
662168404Spjd
663185029Spjd	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
664185029Spjd	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
665185029Spjd	ASSERT(dmu_tx_is_syncing(tx));
666185029Spjd	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
667185029Spjd
668168404Spjd	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
669168404Spjd	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
670248571Smm	VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
671168404Spjd	dmu_buf_will_dirty(dbuf, tx);
672168404Spjd	dsphys = dbuf->db_data;
673185029Spjd	bzero(dsphys, sizeof (dsl_dataset_phys_t));
674168404Spjd	dsphys->ds_dir_obj = dd->dd_object;
675185029Spjd	dsphys->ds_flags = flags;
676168404Spjd	dsphys->ds_fsid_guid = unique_create();
677236823Spjd	do {
678236823Spjd		(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
679236823Spjd		    sizeof (dsphys->ds_guid));
680236823Spjd	} while (dsphys->ds_guid == 0);
681168404Spjd	dsphys->ds_snapnames_zapobj =
682185029Spjd	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
683185029Spjd	    DMU_OT_NONE, 0, tx);
684168404Spjd	dsphys->ds_creation_time = gethrestime_sec();
685185029Spjd	dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
686185029Spjd
687219089Spjd	if (origin == NULL) {
688219089Spjd		dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
689219089Spjd	} else {
690248571Smm		dsl_dataset_t *ohds; /* head of the origin snapshot */
691219089Spjd
692185029Spjd		dsphys->ds_prev_snap_obj = origin->ds_object;
693185029Spjd		dsphys->ds_prev_snap_txg =
694185029Spjd		    origin->ds_phys->ds_creation_txg;
695236884Smm		dsphys->ds_referenced_bytes =
696236884Smm		    origin->ds_phys->ds_referenced_bytes;
697185029Spjd		dsphys->ds_compressed_bytes =
698185029Spjd		    origin->ds_phys->ds_compressed_bytes;
699185029Spjd		dsphys->ds_uncompressed_bytes =
700185029Spjd		    origin->ds_phys->ds_uncompressed_bytes;
701185029Spjd		dsphys->ds_bp = origin->ds_phys->ds_bp;
702185029Spjd		dsphys->ds_flags |= origin->ds_phys->ds_flags;
703185029Spjd
704185029Spjd		dmu_buf_will_dirty(origin->ds_dbuf, tx);
705185029Spjd		origin->ds_phys->ds_num_children++;
706185029Spjd
707248571Smm		VERIFY0(dsl_dataset_hold_obj(dp,
708219089Spjd		    origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
709219089Spjd		dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
710219089Spjd		    dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
711219089Spjd		dsl_dataset_rele(ohds, FTAG);
712219089Spjd
713185029Spjd		if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
714185029Spjd			if (origin->ds_phys->ds_next_clones_obj == 0) {
715185029Spjd				origin->ds_phys->ds_next_clones_obj =
716185029Spjd				    zap_create(mos,
717185029Spjd				    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
718185029Spjd			}
719248571Smm			VERIFY0(zap_add_int(mos,
720248571Smm			    origin->ds_phys->ds_next_clones_obj, dsobj, tx));
721185029Spjd		}
722185029Spjd
723185029Spjd		dmu_buf_will_dirty(dd->dd_dbuf, tx);
724185029Spjd		dd->dd_phys->dd_origin_obj = origin->ds_object;
725219089Spjd		if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
726219089Spjd			if (origin->ds_dir->dd_phys->dd_clones == 0) {
727219089Spjd				dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
728219089Spjd				origin->ds_dir->dd_phys->dd_clones =
729219089Spjd				    zap_create(mos,
730219089Spjd				    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
731219089Spjd			}
732248571Smm			VERIFY0(zap_add_int(mos,
733219089Spjd			    origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
734219089Spjd		}
735185029Spjd	}
736185029Spjd
737185029Spjd	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
738185029Spjd		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
739185029Spjd
740168404Spjd	dmu_buf_rele(dbuf, FTAG);
741168404Spjd
742168404Spjd	dmu_buf_will_dirty(dd->dd_dbuf, tx);
743168404Spjd	dd->dd_phys->dd_head_dataset_obj = dsobj;
744168404Spjd
745185029Spjd	return (dsobj);
746168404Spjd}
747168404Spjd
748248571Smmstatic void
749248571Smmdsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
750248571Smm{
751248571Smm	objset_t *os;
752248571Smm
753248571Smm	VERIFY0(dmu_objset_from_ds(ds, &os));
754248571Smm	bzero(&os->os_zil_header, sizeof (os->os_zil_header));
755248571Smm	dsl_dataset_dirty(ds, tx);
756248571Smm}
757248571Smm
758168404Spjduint64_t
759185029Spjddsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
760185029Spjd    dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
761168404Spjd{
762168404Spjd	dsl_pool_t *dp = pdd->dd_pool;
763168404Spjd	uint64_t dsobj, ddobj;
764168404Spjd	dsl_dir_t *dd;
765168404Spjd
766248571Smm	ASSERT(dmu_tx_is_syncing(tx));
767168404Spjd	ASSERT(lastname[0] != '@');
768168404Spjd
769185029Spjd	ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
770248571Smm	VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));
771168404Spjd
772248571Smm	dsobj = dsl_dataset_create_sync_dd(dd, origin,
773248571Smm	    flags & ~DS_CREATE_FLAG_NODIRTY, tx);
774168404Spjd
775185029Spjd	dsl_deleg_set_create_perms(dd, tx, cr);
776168404Spjd
777265744Sdelphij	/*
778265744Sdelphij	 * Since we're creating a new node we know it's a leaf, so we can
779265744Sdelphij	 * initialize the counts if the limit feature is active.
780265744Sdelphij	 */
781265744Sdelphij	if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {
782265744Sdelphij		uint64_t cnt = 0;
783265744Sdelphij		objset_t *os = dd->dd_pool->dp_meta_objset;
784265744Sdelphij
785265744Sdelphij		dsl_dir_zapify(dd, tx);
786265744Sdelphij		VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
787265744Sdelphij		    sizeof (cnt), 1, &cnt, tx));
788265744Sdelphij		VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
789265744Sdelphij		    sizeof (cnt), 1, &cnt, tx));
790265744Sdelphij	}
791265744Sdelphij
792248571Smm	dsl_dir_rele(dd, FTAG);
793168404Spjd
794219089Spjd	/*
795219089Spjd	 * If we are creating a clone, make sure we zero out any stale
796219089Spjd	 * data from the origin snapshots zil header.
797219089Spjd	 */
798248571Smm	if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) {
799219089Spjd		dsl_dataset_t *ds;
800219089Spjd
801248571Smm		VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
802248571Smm		dsl_dataset_zero_zil(ds, tx);
803219089Spjd		dsl_dataset_rele(ds, FTAG);
804219089Spjd	}
805219089Spjd
806168404Spjd	return (dsobj);
807168404Spjd}
808168404Spjd
809228103Smm#ifdef __FreeBSD__
810228103Smm/* FreeBSD ioctl compat begin */
811168404Spjdstruct destroyarg {
812228103Smm	nvlist_t *nvl;
813228103Smm	const char *snapname;
814168404Spjd};
815168404Spjd
816168404Spjdstatic int
817228103Smmdsl_check_snap_cb(const char *name, void *arg)
818168404Spjd{
819168404Spjd	struct destroyarg *da = arg;
820168404Spjd	dsl_dataset_t *ds;
821219089Spjd	char *dsname;
822168404Spjd
823219089Spjd	dsname = kmem_asprintf("%s@%s", name, da->snapname);
824248493Smm	fnvlist_add_boolean(da->nvl, dsname);
825248493Smm	kmem_free(dsname, strlen(dsname) + 1);
826219089Spjd
827228103Smm	return (0);
828228103Smm}
829228103Smm
830228103Smmint
831248571Smmdmu_get_recursive_snaps_nvl(char *fsname, const char *snapname,
832228103Smm    nvlist_t *snaps)
833228103Smm{
834228103Smm	struct destroyarg *da;
835228103Smm	int err;
836228103Smm
837228103Smm	da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP);
838228103Smm	da->nvl = snaps;
839228103Smm	da->snapname = snapname;
840228103Smm	err = dmu_objset_find(fsname, dsl_check_snap_cb, da,
841228103Smm	    DS_FIND_CHILDREN);
842228103Smm	kmem_free(da, sizeof (struct destroyarg));
843228103Smm
844185029Spjd	return (err);
845168404Spjd}
846228103Smm/* FreeBSD ioctl compat end */
847228103Smm#endif /* __FreeBSD__ */
848168404Spjd
849168404Spjd/*
850185029Spjd * The unique space in the head dataset can be calculated by subtracting
851185029Spjd * the space used in the most recent snapshot, that is still being used
852185029Spjd * in this file system, from the space currently in use.  To figure out
853185029Spjd * the space in the most recent snapshot still in use, we need to take
854185029Spjd * the total space used in the snapshot and subtract out the space that
855185029Spjd * has been freed up since the snapshot was taken.
856185029Spjd */
857248571Smmvoid
858185029Spjddsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
859185029Spjd{
860185029Spjd	uint64_t mrs_used;
861185029Spjd	uint64_t dlused, dlcomp, dluncomp;
862185029Spjd
863219089Spjd	ASSERT(!dsl_dataset_is_snapshot(ds));
864185029Spjd
865185029Spjd	if (ds->ds_phys->ds_prev_snap_obj != 0)
866236884Smm		mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
867185029Spjd	else
868185029Spjd		mrs_used = 0;
869185029Spjd
870219089Spjd	dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
871185029Spjd
872185029Spjd	ASSERT3U(dlused, <=, mrs_used);
873185029Spjd	ds->ds_phys->ds_unique_bytes =
874236884Smm	    ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
875185029Spjd
876219089Spjd	if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
877185029Spjd	    SPA_VERSION_UNIQUE_ACCURATE)
878185029Spjd		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
879185029Spjd}
880185029Spjd
881248571Smmvoid
882248571Smmdsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
883219089Spjd    dmu_tx_t *tx)
884219089Spjd{
885209962Smm	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
886209962Smm	uint64_t count;
887209962Smm	int err;
888209962Smm
889209962Smm	ASSERT(ds->ds_phys->ds_num_children >= 2);
890209962Smm	err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
891209962Smm	/*
892209962Smm	 * The err should not be ENOENT, but a bug in a previous version
893209962Smm	 * of the code could cause upgrade_clones_cb() to not set
894209962Smm	 * ds_next_snap_obj when it should, leading to a missing entry.
895209962Smm	 * If we knew that the pool was created after
896209962Smm	 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
897209962Smm	 * ENOENT.  However, at least we can check that we don't have
898209962Smm	 * too many entries in the next_clones_obj even after failing to
899209962Smm	 * remove this one.
900209962Smm	 */
901248571Smm	if (err != ENOENT)
902240415Smm		VERIFY0(err);
903248571Smm	ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj,
904209962Smm	    &count));
905209962Smm	ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
906209962Smm}
907209962Smm
908248571Smm
909248571Smmblkptr_t *
910248571Smmdsl_dataset_get_blkptr(dsl_dataset_t *ds)
911219089Spjd{
912248571Smm	return (&ds->ds_phys->ds_bp);
913219089Spjd}
914219089Spjd
915248571Smmvoid
916248571Smmdsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
917219089Spjd{
918248571Smm	ASSERT(dmu_tx_is_syncing(tx));
919248571Smm	/* If it's the meta-objset, set dp_meta_rootbp */
920248571Smm	if (ds == NULL) {
921248571Smm		tx->tx_pool->dp_meta_rootbp = *bp;
922219089Spjd	} else {
923248571Smm		dmu_buf_will_dirty(ds->ds_dbuf, tx);
924248571Smm		ds->ds_phys->ds_bp = *bp;
925219089Spjd	}
926219089Spjd}
927219089Spjd
928248571Smmspa_t *
929248571Smmdsl_dataset_get_spa(dsl_dataset_t *ds)
930219089Spjd{
931248571Smm	return (ds->ds_dir->dd_pool->dp_spa);
932219089Spjd}
933219089Spjd
934185029Spjdvoid
935248571Smmdsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
936185029Spjd{
937248571Smm	dsl_pool_t *dp;
938168404Spjd
939248571Smm	if (ds == NULL) /* this is the meta-objset */
940219089Spjd		return;
941219089Spjd
942248571Smm	ASSERT(ds->ds_objset != NULL);
943185029Spjd
944248571Smm	if (ds->ds_phys->ds_next_snap_obj != 0)
945248571Smm		panic("dirtying snapshot!");
946219089Spjd
947248571Smm	dp = ds->ds_dir->dd_pool;
948219089Spjd
949248571Smm	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
950248571Smm		/* up the hold count until we can be written out */
951248571Smm		dmu_buf_add_ref(ds->ds_dbuf, ds);
952185029Spjd	}
953248571Smm}
954185029Spjd
955248571Smmboolean_t
956248571Smmdsl_dataset_is_dirty(dsl_dataset_t *ds)
957248571Smm{
958248571Smm	for (int t = 0; t < TXG_SIZE; t++) {
959248571Smm		if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
960248571Smm		    ds, t))
961248571Smm			return (B_TRUE);
962168404Spjd	}
963248571Smm	return (B_FALSE);
964185029Spjd}
965168404Spjd
966185029Spjdstatic int
967185029Spjddsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
968185029Spjd{
969185029Spjd	uint64_t asize;
970185029Spjd
971185029Spjd	if (!dmu_tx_is_syncing(tx))
972185029Spjd		return (0);
973185029Spjd
974185029Spjd	/*
975185029Spjd	 * If there's an fs-only reservation, any blocks that might become
976185029Spjd	 * owned by the snapshot dataset must be accommodated by space
977185029Spjd	 * outside of the reservation.
978185029Spjd	 */
979219089Spjd	ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
980219089Spjd	asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
981219089Spjd	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
982249195Smm		return (SET_ERROR(ENOSPC));
983185029Spjd
984185029Spjd	/*
985248571Smm	 * Propagate any reserved space for this snapshot to other
986185029Spjd	 * snapshot checks in this sync group.
987185029Spjd	 */
988185029Spjd	if (asize > 0)
989185029Spjd		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
990185029Spjd
991185029Spjd	return (0);
992168404Spjd}
993168404Spjd
994248571Smmtypedef struct dsl_dataset_snapshot_arg {
995248571Smm	nvlist_t *ddsa_snaps;
996248571Smm	nvlist_t *ddsa_props;
997248571Smm	nvlist_t *ddsa_errors;
998265744Sdelphij	cred_t *ddsa_cr;
999248571Smm} dsl_dataset_snapshot_arg_t;
1000248571Smm
1001168404Spjdint
1002248571Smmdsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
1003265744Sdelphij    dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr)
1004168404Spjd{
1005248571Smm	int error;
1006168404Spjd	uint64_t value;
1007168404Spjd
1008248571Smm	ds->ds_trysnap_txg = tx->tx_txg;
1009248571Smm
1010248571Smm	if (!dmu_tx_is_syncing(tx))
1011248571Smm		return (0);
1012248571Smm
1013168404Spjd	/*
1014168404Spjd	 * We don't allow multiple snapshots of the same txg.  If there
1015168404Spjd	 * is already one, try again.
1016168404Spjd	 */
1017168404Spjd	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
1018249195Smm		return (SET_ERROR(EAGAIN));
1019168404Spjd
1020168404Spjd	/*
1021248571Smm	 * Check for conflicting snapshot name.
1022168404Spjd	 */
1023248571Smm	error = dsl_dataset_snap_lookup(ds, snapname, &value);
1024248571Smm	if (error == 0)
1025249195Smm		return (SET_ERROR(EEXIST));
1026248571Smm	if (error != ENOENT)
1027248571Smm		return (error);
1028168404Spjd
1029253819Sdelphij	/*
1030253819Sdelphij	 * We don't allow taking snapshots of inconsistent datasets, such as
1031253819Sdelphij	 * those into which we are currently receiving.  However, if we are
1032253819Sdelphij	 * creating this snapshot as part of a receive, this check will be
1033253819Sdelphij	 * executed atomically with respect to the completion of the receive
1034253819Sdelphij	 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this
1035253819Sdelphij	 * case we ignore this, knowing it will be fixed up for us shortly in
1036253819Sdelphij	 * dmu_recv_end_sync().
1037253819Sdelphij	 */
1038253819Sdelphij	if (!recv && DS_IS_INCONSISTENT(ds))
1039253819Sdelphij		return (SET_ERROR(EBUSY));
1040253819Sdelphij
1041265744Sdelphij	/*
1042265744Sdelphij	 * Skip the check for temporary snapshots or if we have already checked
1043265744Sdelphij	 * the counts in dsl_dataset_snapshot_check. This means we really only
1044265744Sdelphij	 * check the count here when we're receiving a stream.
1045265744Sdelphij	 */
1046265744Sdelphij	if (cnt != 0 && cr != NULL) {
1047265744Sdelphij		error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
1048265744Sdelphij		    ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr);
1049265744Sdelphij		if (error != 0)
1050265744Sdelphij			return (error);
1051265744Sdelphij	}
1052265744Sdelphij
1053248571Smm	error = dsl_dataset_snapshot_reserve_space(ds, tx);
1054248571Smm	if (error != 0)
1055248571Smm		return (error);
1056168498Spjd
1057168404Spjd	return (0);
1058168404Spjd}
1059168404Spjd
1060248571Smmstatic int
1061248571Smmdsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
1062248571Smm{
1063248571Smm	dsl_dataset_snapshot_arg_t *ddsa = arg;
1064248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1065248571Smm	nvpair_t *pair;
1066248571Smm	int rv = 0;
1067248571Smm
1068265744Sdelphij	/*
1069265744Sdelphij	 * Pre-compute how many total new snapshots will be created for each
1070265744Sdelphij	 * level in the tree and below. This is needed for validating the
1071265744Sdelphij	 * snapshot limit when either taking a recursive snapshot or when
1072265744Sdelphij	 * taking multiple snapshots.
1073265744Sdelphij	 *
1074265744Sdelphij	 * The problem is that the counts are not actually adjusted when
1075265744Sdelphij	 * we are checking, only when we finally sync. For a single snapshot,
1076265744Sdelphij	 * this is easy, the count will increase by 1 at each node up the tree,
1077265744Sdelphij	 * but its more complicated for the recursive/multiple snapshot case.
1078265744Sdelphij	 *
1079265744Sdelphij	 * The dsl_fs_ss_limit_check function does recursively check the count
1080265744Sdelphij	 * at each level up the tree but since it is validating each snapshot
1081265744Sdelphij	 * independently we need to be sure that we are validating the complete
1082265744Sdelphij	 * count for the entire set of snapshots. We do this by rolling up the
1083265744Sdelphij	 * counts for each component of the name into an nvlist and then
1084265744Sdelphij	 * checking each of those cases with the aggregated count.
1085265744Sdelphij	 *
1086265744Sdelphij	 * This approach properly handles not only the recursive snapshot
1087265744Sdelphij	 * case (where we get all of those on the ddsa_snaps list) but also
1088265744Sdelphij	 * the sibling case (e.g. snapshot a/b and a/c so that we will also
1089265744Sdelphij	 * validate the limit on 'a' using a count of 2).
1090265744Sdelphij	 *
1091265744Sdelphij	 * We validate the snapshot names in the third loop and only report
1092265744Sdelphij	 * name errors once.
1093265744Sdelphij	 */
1094265744Sdelphij	if (dmu_tx_is_syncing(tx)) {
1095265744Sdelphij		nvlist_t *cnt_track = NULL;
1096265744Sdelphij		cnt_track = fnvlist_alloc();
1097265744Sdelphij
1098265744Sdelphij		/* Rollup aggregated counts into the cnt_track list */
1099265744Sdelphij		for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
1100265744Sdelphij		    pair != NULL;
1101265744Sdelphij		    pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
1102265744Sdelphij			char *pdelim;
1103265744Sdelphij			uint64_t val;
1104265744Sdelphij			char nm[MAXPATHLEN];
1105265744Sdelphij
1106265744Sdelphij			(void) strlcpy(nm, nvpair_name(pair), sizeof (nm));
1107265744Sdelphij			pdelim = strchr(nm, '@');
1108265744Sdelphij			if (pdelim == NULL)
1109265744Sdelphij				continue;
1110265744Sdelphij			*pdelim = '\0';
1111265744Sdelphij
1112265744Sdelphij			do {
1113265744Sdelphij				if (nvlist_lookup_uint64(cnt_track, nm,
1114265744Sdelphij				    &val) == 0) {
1115265744Sdelphij					/* update existing entry */
1116265744Sdelphij					fnvlist_add_uint64(cnt_track, nm,
1117265744Sdelphij					    val + 1);
1118265744Sdelphij				} else {
1119265744Sdelphij					/* add to list */
1120265744Sdelphij					fnvlist_add_uint64(cnt_track, nm, 1);
1121265744Sdelphij				}
1122265744Sdelphij
1123265744Sdelphij				pdelim = strrchr(nm, '/');
1124265744Sdelphij				if (pdelim != NULL)
1125265744Sdelphij					*pdelim = '\0';
1126265744Sdelphij			} while (pdelim != NULL);
1127265744Sdelphij		}
1128265744Sdelphij
1129265744Sdelphij		/* Check aggregated counts at each level */
1130265744Sdelphij		for (pair = nvlist_next_nvpair(cnt_track, NULL);
1131265744Sdelphij		    pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) {
1132265744Sdelphij			int error = 0;
1133265744Sdelphij			char *name;
1134265744Sdelphij			uint64_t cnt = 0;
1135265744Sdelphij			dsl_dataset_t *ds;
1136265744Sdelphij
1137265744Sdelphij			name = nvpair_name(pair);
1138265744Sdelphij			cnt = fnvpair_value_uint64(pair);
1139265744Sdelphij			ASSERT(cnt > 0);
1140265744Sdelphij
1141265744Sdelphij			error = dsl_dataset_hold(dp, name, FTAG, &ds);
1142265744Sdelphij			if (error == 0) {
1143265744Sdelphij				error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
1144265744Sdelphij				    ZFS_PROP_SNAPSHOT_LIMIT, NULL,
1145265744Sdelphij				    ddsa->ddsa_cr);
1146265744Sdelphij				dsl_dataset_rele(ds, FTAG);
1147265744Sdelphij			}
1148265744Sdelphij
1149265744Sdelphij			if (error != 0) {
1150265744Sdelphij				if (ddsa->ddsa_errors != NULL)
1151265744Sdelphij					fnvlist_add_int32(ddsa->ddsa_errors,
1152265744Sdelphij					    name, error);
1153265744Sdelphij				rv = error;
1154265744Sdelphij				/* only report one error for this check */
1155265744Sdelphij				break;
1156265744Sdelphij			}
1157265744Sdelphij		}
1158265744Sdelphij		nvlist_free(cnt_track);
1159265744Sdelphij	}
1160265744Sdelphij
1161248571Smm	for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
1162248571Smm	    pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
1163248571Smm		int error = 0;
1164248571Smm		dsl_dataset_t *ds;
1165248571Smm		char *name, *atp;
1166248571Smm		char dsname[MAXNAMELEN];
1167248571Smm
1168248571Smm		name = nvpair_name(pair);
1169248571Smm		if (strlen(name) >= MAXNAMELEN)
1170249195Smm			error = SET_ERROR(ENAMETOOLONG);
1171248571Smm		if (error == 0) {
1172248571Smm			atp = strchr(name, '@');
1173248571Smm			if (atp == NULL)
1174249195Smm				error = SET_ERROR(EINVAL);
1175248571Smm			if (error == 0)
1176248571Smm				(void) strlcpy(dsname, name, atp - name + 1);
1177248571Smm		}
1178248571Smm		if (error == 0)
1179248571Smm			error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
1180248571Smm		if (error == 0) {
1181265744Sdelphij			/* passing 0/NULL skips dsl_fs_ss_limit_check */
1182248571Smm			error = dsl_dataset_snapshot_check_impl(ds,
1183265744Sdelphij			    atp + 1, tx, B_FALSE, 0, NULL);
1184248571Smm			dsl_dataset_rele(ds, FTAG);
1185248571Smm		}
1186248571Smm
1187248571Smm		if (error != 0) {
1188248571Smm			if (ddsa->ddsa_errors != NULL) {
1189248571Smm				fnvlist_add_int32(ddsa->ddsa_errors,
1190248571Smm				    name, error);
1191248571Smm			}
1192248571Smm			rv = error;
1193248571Smm		}
1194248571Smm	}
1195265744Sdelphij
1196248571Smm	return (rv);
1197248571Smm}
1198248571Smm
1199168404Spjdvoid
1200248571Smmdsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
1201248571Smm    dmu_tx_t *tx)
1202168404Spjd{
1203248571Smm	static zil_header_t zero_zil;
1204248571Smm
1205168404Spjd	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1206168404Spjd	dmu_buf_t *dbuf;
1207168404Spjd	dsl_dataset_phys_t *dsphys;
1208185029Spjd	uint64_t dsobj, crtxg;
1209168404Spjd	objset_t *mos = dp->dp_meta_objset;
1210248571Smm	objset_t *os;
1211168404Spjd
1212248571Smm	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
1213168404Spjd
1214185029Spjd	/*
1215248571Smm	 * If we are on an old pool, the zil must not be active, in which
1216248571Smm	 * case it will be zeroed.  Usually zil_suspend() accomplishes this.
1217248571Smm	 */
1218248571Smm	ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP ||
1219248571Smm	    dmu_objset_from_ds(ds, &os) != 0 ||
1220248571Smm	    bcmp(&os->os_phys->os_zil_header, &zero_zil,
1221248571Smm	    sizeof (zero_zil)) == 0);
1222248571Smm
1223265744Sdelphij	dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx);
1224248571Smm
1225248571Smm	/*
1226185029Spjd	 * The origin's ds_creation_txg has to be < TXG_INITIAL
1227185029Spjd	 */
1228185029Spjd	if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
1229185029Spjd		crtxg = 1;
1230185029Spjd	else
1231185029Spjd		crtxg = tx->tx_txg;
1232185029Spjd
1233168404Spjd	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1234168404Spjd	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1235248571Smm	VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1236168404Spjd	dmu_buf_will_dirty(dbuf, tx);
1237168404Spjd	dsphys = dbuf->db_data;
1238185029Spjd	bzero(dsphys, sizeof (dsl_dataset_phys_t));
1239168404Spjd	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1240168404Spjd	dsphys->ds_fsid_guid = unique_create();
1241236823Spjd	do {
1242236823Spjd		(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1243236823Spjd		    sizeof (dsphys->ds_guid));
1244236823Spjd	} while (dsphys->ds_guid == 0);
1245168404Spjd	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1246168404Spjd	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1247168404Spjd	dsphys->ds_next_snap_obj = ds->ds_object;
1248168404Spjd	dsphys->ds_num_children = 1;
1249168404Spjd	dsphys->ds_creation_time = gethrestime_sec();
1250185029Spjd	dsphys->ds_creation_txg = crtxg;
1251168404Spjd	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1252236884Smm	dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
1253168404Spjd	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1254168404Spjd	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
1255168404Spjd	dsphys->ds_flags = ds->ds_phys->ds_flags;
1256168404Spjd	dsphys->ds_bp = ds->ds_phys->ds_bp;
1257168404Spjd	dmu_buf_rele(dbuf, FTAG);
1258168404Spjd
1259168404Spjd	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
1260168404Spjd	if (ds->ds_prev) {
1261185029Spjd		uint64_t next_clones_obj =
1262185029Spjd		    ds->ds_prev->ds_phys->ds_next_clones_obj;
1263168404Spjd		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1264168404Spjd		    ds->ds_object ||
1265168404Spjd		    ds->ds_prev->ds_phys->ds_num_children > 1);
1266168404Spjd		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
1267168404Spjd			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1268168404Spjd			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1269168404Spjd			    ds->ds_prev->ds_phys->ds_creation_txg);
1270168404Spjd			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1271185029Spjd		} else if (next_clones_obj != 0) {
1272248571Smm			dsl_dataset_remove_from_next_clones(ds->ds_prev,
1273209962Smm			    dsphys->ds_next_snap_obj, tx);
1274248571Smm			VERIFY0(zap_add_int(mos,
1275185029Spjd			    next_clones_obj, dsobj, tx));
1276168404Spjd		}
1277168404Spjd	}
1278168404Spjd
1279185029Spjd	/*
1280185029Spjd	 * If we have a reference-reservation on this dataset, we will
1281185029Spjd	 * need to increase the amount of refreservation being charged
1282185029Spjd	 * since our unique space is going to zero.
1283185029Spjd	 */
1284185029Spjd	if (ds->ds_reserved) {
1285219089Spjd		int64_t delta;
1286219089Spjd		ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
1287219089Spjd		delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
1288185029Spjd		dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
1289219089Spjd		    delta, 0, 0, tx);
1290185029Spjd	}
1291185029Spjd
1292168404Spjd	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1293219089Spjd	ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist,
1294219089Spjd	    UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx);
1295219089Spjd	dsl_deadlist_close(&ds->ds_deadlist);
1296219089Spjd	dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
1297219089Spjd	dsl_deadlist_add_key(&ds->ds_deadlist,
1298219089Spjd	    ds->ds_phys->ds_prev_snap_txg, tx);
1299219089Spjd
1300185029Spjd	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
1301168404Spjd	ds->ds_phys->ds_prev_snap_obj = dsobj;
1302185029Spjd	ds->ds_phys->ds_prev_snap_txg = crtxg;
1303168404Spjd	ds->ds_phys->ds_unique_bytes = 0;
1304185029Spjd	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
1305185029Spjd		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1306168404Spjd
1307248571Smm	VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1308248571Smm	    snapname, 8, 1, &dsobj, tx));
1309168404Spjd
1310168404Spjd	if (ds->ds_prev)
1311248571Smm		dsl_dataset_rele(ds->ds_prev, ds);
1312248571Smm	VERIFY0(dsl_dataset_hold_obj(dp,
1313185029Spjd	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
1314185029Spjd
1315219089Spjd	dsl_scan_ds_snapshotted(ds, tx);
1316185029Spjd
1317219089Spjd	dsl_dir_snap_cmtime_update(ds->ds_dir);
1318219089Spjd
1319248571Smm	spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
1320168404Spjd}
1321168404Spjd
1322248571Smmstatic void
1323248571Smmdsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
1324248571Smm{
1325248571Smm	dsl_dataset_snapshot_arg_t *ddsa = arg;
1326248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1327248571Smm	nvpair_t *pair;
1328248571Smm
1329248571Smm	for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
1330248571Smm	    pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
1331248571Smm		dsl_dataset_t *ds;
1332248571Smm		char *name, *atp;
1333248571Smm		char dsname[MAXNAMELEN];
1334248571Smm
1335248571Smm		name = nvpair_name(pair);
1336248571Smm		atp = strchr(name, '@');
1337248571Smm		(void) strlcpy(dsname, name, atp - name + 1);
1338248571Smm		VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds));
1339248571Smm
1340248571Smm		dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx);
1341248571Smm		if (ddsa->ddsa_props != NULL) {
1342248571Smm			dsl_props_set_sync_impl(ds->ds_prev,
1343248571Smm			    ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
1344248571Smm		}
1345248571Smm		dsl_dataset_rele(ds, FTAG);
1346248571Smm	}
1347248571Smm}
1348248571Smm
1349248571Smm/*
1350248571Smm * The snapshots must all be in the same pool.
1351248571Smm * All-or-nothing: if there are any failures, nothing will be modified.
1352248571Smm */
1353248571Smmint
1354248571Smmdsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
1355248571Smm{
1356248571Smm	dsl_dataset_snapshot_arg_t ddsa;
1357248571Smm	nvpair_t *pair;
1358248571Smm	boolean_t needsuspend;
1359248571Smm	int error;
1360248571Smm	spa_t *spa;
1361248571Smm	char *firstname;
1362248571Smm	nvlist_t *suspended = NULL;
1363248571Smm
1364248571Smm	pair = nvlist_next_nvpair(snaps, NULL);
1365248571Smm	if (pair == NULL)
1366248571Smm		return (0);
1367248571Smm	firstname = nvpair_name(pair);
1368248571Smm
1369248571Smm	error = spa_open(firstname, &spa, FTAG);
1370248571Smm	if (error != 0)
1371248571Smm		return (error);
1372248571Smm	needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
1373248571Smm	spa_close(spa, FTAG);
1374248571Smm
1375248571Smm	if (needsuspend) {
1376248571Smm		suspended = fnvlist_alloc();
1377248571Smm		for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1378248571Smm		    pair = nvlist_next_nvpair(snaps, pair)) {
1379248571Smm			char fsname[MAXNAMELEN];
1380248571Smm			char *snapname = nvpair_name(pair);
1381248571Smm			char *atp;
1382248571Smm			void *cookie;
1383248571Smm
1384248571Smm			atp = strchr(snapname, '@');
1385248571Smm			if (atp == NULL) {
1386249195Smm				error = SET_ERROR(EINVAL);
1387248571Smm				break;
1388248571Smm			}
1389248571Smm			(void) strlcpy(fsname, snapname, atp - snapname + 1);
1390248571Smm
1391248571Smm			error = zil_suspend(fsname, &cookie);
1392248571Smm			if (error != 0)
1393248571Smm				break;
1394248571Smm			fnvlist_add_uint64(suspended, fsname,
1395248571Smm			    (uintptr_t)cookie);
1396248571Smm		}
1397248571Smm	}
1398248571Smm
1399248571Smm	ddsa.ddsa_snaps = snaps;
1400248571Smm	ddsa.ddsa_props = props;
1401248571Smm	ddsa.ddsa_errors = errors;
1402265744Sdelphij	ddsa.ddsa_cr = CRED();
1403248571Smm
1404248571Smm	if (error == 0) {
1405248571Smm		error = dsl_sync_task(firstname, dsl_dataset_snapshot_check,
1406248571Smm		    dsl_dataset_snapshot_sync, &ddsa,
1407269006Sdelphij		    fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL);
1408248571Smm	}
1409248571Smm
1410248571Smm	if (suspended != NULL) {
1411248571Smm		for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL;
1412248571Smm		    pair = nvlist_next_nvpair(suspended, pair)) {
1413248571Smm			zil_resume((void *)(uintptr_t)
1414248571Smm			    fnvpair_value_uint64(pair));
1415248571Smm		}
1416248571Smm		fnvlist_free(suspended);
1417248571Smm	}
1418248571Smm
1419248571Smm#ifdef __FreeBSD__
1420248571Smm#ifdef _KERNEL
1421248571Smm	if (error == 0) {
1422248571Smm		for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1423248571Smm		    pair = nvlist_next_nvpair(snaps, pair)) {
1424248571Smm			char *snapname = nvpair_name(pair);
1425248571Smm			zvol_create_minors(snapname);
1426248571Smm		}
1427248571Smm	}
1428248571Smm#endif
1429248571Smm#endif
1430248571Smm	return (error);
1431248571Smm}
1432248571Smm
1433248571Smmtypedef struct dsl_dataset_snapshot_tmp_arg {
1434248571Smm	const char *ddsta_fsname;
1435248571Smm	const char *ddsta_snapname;
1436248571Smm	minor_t ddsta_cleanup_minor;
1437248571Smm	const char *ddsta_htag;
1438248571Smm} dsl_dataset_snapshot_tmp_arg_t;
1439248571Smm
1440248571Smmstatic int
1441248571Smmdsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx)
1442248571Smm{
1443248571Smm	dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
1444248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1445248571Smm	dsl_dataset_t *ds;
1446248571Smm	int error;
1447248571Smm
1448248571Smm	error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds);
1449248571Smm	if (error != 0)
1450248571Smm		return (error);
1451248571Smm
1452265744Sdelphij	/* NULL cred means no limit check for tmp snapshot */
1453253819Sdelphij	error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname,
1454265744Sdelphij	    tx, B_FALSE, 0, NULL);
1455248571Smm	if (error != 0) {
1456248571Smm		dsl_dataset_rele(ds, FTAG);
1457248571Smm		return (error);
1458248571Smm	}
1459248571Smm
1460248571Smm	if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) {
1461248571Smm		dsl_dataset_rele(ds, FTAG);
1462249195Smm		return (SET_ERROR(ENOTSUP));
1463248571Smm	}
1464248571Smm	error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag,
1465248571Smm	    B_TRUE, tx);
1466248571Smm	if (error != 0) {
1467248571Smm		dsl_dataset_rele(ds, FTAG);
1468248571Smm		return (error);
1469248571Smm	}
1470248571Smm
1471248571Smm	dsl_dataset_rele(ds, FTAG);
1472248571Smm	return (0);
1473248571Smm}
1474248571Smm
1475248571Smmstatic void
1476248571Smmdsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx)
1477248571Smm{
1478248571Smm	dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
1479248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1480248571Smm	dsl_dataset_t *ds;
1481248571Smm
1482248571Smm	VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds));
1483248571Smm
1484248571Smm	dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx);
1485248571Smm	dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag,
1486248571Smm	    ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx);
1487248571Smm	dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx);
1488248571Smm
1489248571Smm	dsl_dataset_rele(ds, FTAG);
1490248571Smm}
1491248571Smm
1492248571Smmint
1493248571Smmdsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
1494248571Smm    minor_t cleanup_minor, const char *htag)
1495248571Smm{
1496248571Smm	dsl_dataset_snapshot_tmp_arg_t ddsta;
1497248571Smm	int error;
1498248571Smm	spa_t *spa;
1499248571Smm	boolean_t needsuspend;
1500248571Smm	void *cookie;
1501248571Smm
1502248571Smm	ddsta.ddsta_fsname = fsname;
1503248571Smm	ddsta.ddsta_snapname = snapname;
1504248571Smm	ddsta.ddsta_cleanup_minor = cleanup_minor;
1505248571Smm	ddsta.ddsta_htag = htag;
1506248571Smm
1507248571Smm	error = spa_open(fsname, &spa, FTAG);
1508248571Smm	if (error != 0)
1509248571Smm		return (error);
1510248571Smm	needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
1511248571Smm	spa_close(spa, FTAG);
1512248571Smm
1513248571Smm	if (needsuspend) {
1514248571Smm		error = zil_suspend(fsname, &cookie);
1515248571Smm		if (error != 0)
1516248571Smm			return (error);
1517248571Smm	}
1518248571Smm
1519248571Smm	error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check,
1520269006Sdelphij	    dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED);
1521248571Smm
1522248571Smm	if (needsuspend)
1523248571Smm		zil_resume(cookie);
1524248571Smm	return (error);
1525248571Smm}
1526248571Smm
1527248571Smm
1528168404Spjdvoid
1529168404Spjddsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1530168404Spjd{
1531168404Spjd	ASSERT(dmu_tx_is_syncing(tx));
1532219089Spjd	ASSERT(ds->ds_objset != NULL);
1533168404Spjd	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1534168404Spjd
1535185029Spjd	/*
1536185029Spjd	 * in case we had to change ds_fsid_guid when we opened it,
1537185029Spjd	 * sync it out now.
1538185029Spjd	 */
1539185029Spjd	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1540185029Spjd	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
1541185029Spjd
1542219089Spjd	dmu_objset_sync(ds->ds_objset, zio, tx);
1543168404Spjd}
1544168404Spjd
1545228103Smmstatic void
1546228103Smmget_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
1547228103Smm{
1548228103Smm	uint64_t count = 0;
1549228103Smm	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1550228103Smm	zap_cursor_t zc;
1551228103Smm	zap_attribute_t za;
1552248571Smm	nvlist_t *propval = fnvlist_alloc();
1553248571Smm	nvlist_t *val = fnvlist_alloc();
1554228103Smm
1555248571Smm	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
1556228103Smm
1557228103Smm	/*
1558248571Smm	 * There may be missing entries in ds_next_clones_obj
1559228103Smm	 * due to a bug in a previous version of the code.
1560228103Smm	 * Only trust it if it has the right number of entries.
1561228103Smm	 */
1562228103Smm	if (ds->ds_phys->ds_next_clones_obj != 0) {
1563262320Sdelphij		VERIFY0(zap_count(mos, ds->ds_phys->ds_next_clones_obj,
1564228103Smm		    &count));
1565228103Smm	}
1566248571Smm	if (count != ds->ds_phys->ds_num_children - 1)
1567228103Smm		goto fail;
1568228103Smm	for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
1569228103Smm	    zap_cursor_retrieve(&zc, &za) == 0;
1570228103Smm	    zap_cursor_advance(&zc)) {
1571228103Smm		dsl_dataset_t *clone;
1572228103Smm		char buf[ZFS_MAXNAMELEN];
1573248571Smm		VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
1574248571Smm		    za.za_first_integer, FTAG, &clone));
1575228103Smm		dsl_dir_name(clone->ds_dir, buf);
1576248571Smm		fnvlist_add_boolean(val, buf);
1577228103Smm		dsl_dataset_rele(clone, FTAG);
1578228103Smm	}
1579228103Smm	zap_cursor_fini(&zc);
1580248571Smm	fnvlist_add_nvlist(propval, ZPROP_VALUE, val);
1581248571Smm	fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval);
1582228103Smmfail:
1583228103Smm	nvlist_free(val);
1584228103Smm	nvlist_free(propval);
1585228103Smm}
1586228103Smm
1587168404Spjdvoid
1588168404Spjddsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1589168404Spjd{
1590248571Smm	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1591223623Smm	uint64_t refd, avail, uobjs, aobjs, ratio;
1592185029Spjd
1593248571Smm	ASSERT(dsl_pool_config_held(dp));
1594168404Spjd
1595248571Smm	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
1596248571Smm	    (ds->ds_phys->ds_uncompressed_bytes * 100 /
1597248571Smm	    ds->ds_phys->ds_compressed_bytes);
1598248571Smm
1599248571Smm	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
1600248571Smm	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
1601248571Smm	    ds->ds_phys->ds_uncompressed_bytes);
1602248571Smm
1603248571Smm	if (dsl_dataset_is_snapshot(ds)) {
1604248571Smm		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
1605248571Smm		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1606248571Smm		    ds->ds_phys->ds_unique_bytes);
1607248571Smm		get_clones_stat(ds, nv);
1608248571Smm	} else {
1609268659Sdelphij		if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
1610268659Sdelphij			char buf[MAXNAMELEN];
1611268659Sdelphij			dsl_dataset_name(ds->ds_prev, buf);
1612268659Sdelphij			dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
1613268659Sdelphij		}
1614268659Sdelphij
1615248571Smm		dsl_dir_stats(ds->ds_dir, nv);
1616248571Smm	}
1617248571Smm
1618185029Spjd	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1619185029Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1620185029Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1621185029Spjd
1622168404Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1623168404Spjd	    ds->ds_phys->ds_creation_time);
1624168404Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
1625168404Spjd	    ds->ds_phys->ds_creation_txg);
1626185029Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
1627185029Spjd	    ds->ds_quota);
1628185029Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
1629185029Spjd	    ds->ds_reserved);
1630185029Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
1631185029Spjd	    ds->ds_phys->ds_guid);
1632219089Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
1633219089Spjd	    ds->ds_phys->ds_unique_bytes);
1634219089Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
1635219089Spjd	    ds->ds_object);
1636219089Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
1637219089Spjd	    ds->ds_userrefs);
1638219089Spjd	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
1639219089Spjd	    DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
1640168404Spjd
1641228103Smm	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1642228103Smm		uint64_t written, comp, uncomp;
1643228103Smm		dsl_pool_t *dp = ds->ds_dir->dd_pool;
1644228103Smm		dsl_dataset_t *prev;
1645228103Smm
1646228103Smm		int err = dsl_dataset_hold_obj(dp,
1647228103Smm		    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
1648228103Smm		if (err == 0) {
1649228103Smm			err = dsl_dataset_space_written(prev, ds, &written,
1650228103Smm			    &comp, &uncomp);
1651228103Smm			dsl_dataset_rele(prev, FTAG);
1652228103Smm			if (err == 0) {
1653228103Smm				dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
1654228103Smm				    written);
1655228103Smm			}
1656228103Smm		}
1657228103Smm	}
1658168404Spjd}
1659168404Spjd
1660168404Spjdvoid
1661168404Spjddsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1662168404Spjd{
1663248571Smm	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1664248571Smm	ASSERT(dsl_pool_config_held(dp));
1665248571Smm
1666168404Spjd	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1667168404Spjd	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
1668185029Spjd	stat->dds_guid = ds->ds_phys->ds_guid;
1669248571Smm	stat->dds_origin[0] = '\0';
1670248571Smm	if (dsl_dataset_is_snapshot(ds)) {
1671168404Spjd		stat->dds_is_snapshot = B_TRUE;
1672168404Spjd		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1673209962Smm	} else {
1674209962Smm		stat->dds_is_snapshot = B_FALSE;
1675209962Smm		stat->dds_num_clones = 0;
1676168404Spjd
1677248571Smm		if (dsl_dir_is_clone(ds->ds_dir)) {
1678248571Smm			dsl_dataset_t *ods;
1679168404Spjd
1680248571Smm			VERIFY0(dsl_dataset_hold_obj(dp,
1681248571Smm			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
1682248571Smm			dsl_dataset_name(ods, stat->dds_origin);
1683248571Smm			dsl_dataset_rele(ods, FTAG);
1684248571Smm		}
1685168404Spjd	}
1686168404Spjd}
1687168404Spjd
1688168404Spjduint64_t
1689168404Spjddsl_dataset_fsid_guid(dsl_dataset_t *ds)
1690168404Spjd{
1691185029Spjd	return (ds->ds_fsid_guid);
1692168404Spjd}
1693168404Spjd
1694168404Spjdvoid
1695168404Spjddsl_dataset_space(dsl_dataset_t *ds,
1696168404Spjd    uint64_t *refdbytesp, uint64_t *availbytesp,
1697168404Spjd    uint64_t *usedobjsp, uint64_t *availobjsp)
1698168404Spjd{
1699236884Smm	*refdbytesp = ds->ds_phys->ds_referenced_bytes;
1700168404Spjd	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
1701185029Spjd	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
1702185029Spjd		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
1703185029Spjd	if (ds->ds_quota != 0) {
1704185029Spjd		/*
1705185029Spjd		 * Adjust available bytes according to refquota
1706185029Spjd		 */
1707185029Spjd		if (*refdbytesp < ds->ds_quota)
1708185029Spjd			*availbytesp = MIN(*availbytesp,
1709185029Spjd			    ds->ds_quota - *refdbytesp);
1710185029Spjd		else
1711185029Spjd			*availbytesp = 0;
1712185029Spjd	}
1713268649Sdelphij	*usedobjsp = BP_GET_FILL(&ds->ds_phys->ds_bp);
1714168404Spjd	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1715168404Spjd}
1716168404Spjd
1717185029Spjdboolean_t
1718253820Sdelphijdsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
1719185029Spjd{
1720185029Spjd	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1721185029Spjd
1722248571Smm	ASSERT(dsl_pool_config_held(dp));
1723253820Sdelphij	if (snap == NULL)
1724185029Spjd		return (B_FALSE);
1725185029Spjd	if (ds->ds_phys->ds_bp.blk_birth >
1726253820Sdelphij	    snap->ds_phys->ds_creation_txg) {
1727253820Sdelphij		objset_t *os, *os_snap;
1728219089Spjd		/*
1729219089Spjd		 * It may be that only the ZIL differs, because it was
1730219089Spjd		 * reset in the head.  Don't count that as being
1731219089Spjd		 * modified.
1732219089Spjd		 */
1733219089Spjd		if (dmu_objset_from_ds(ds, &os) != 0)
1734219089Spjd			return (B_TRUE);
1735253820Sdelphij		if (dmu_objset_from_ds(snap, &os_snap) != 0)
1736219089Spjd			return (B_TRUE);
1737219089Spjd		return (bcmp(&os->os_phys->os_meta_dnode,
1738253820Sdelphij		    &os_snap->os_phys->os_meta_dnode,
1739219089Spjd		    sizeof (os->os_phys->os_meta_dnode)) != 0);
1740219089Spjd	}
1741185029Spjd	return (B_FALSE);
1742185029Spjd}
1743185029Spjd
1744248571Smmtypedef struct dsl_dataset_rename_snapshot_arg {
1745248571Smm	const char *ddrsa_fsname;
1746248571Smm	const char *ddrsa_oldsnapname;
1747248571Smm	const char *ddrsa_newsnapname;
1748248571Smm	boolean_t ddrsa_recursive;
1749248571Smm	dmu_tx_t *ddrsa_tx;
1750248571Smm} dsl_dataset_rename_snapshot_arg_t;
1751248571Smm
1752168404Spjd/* ARGSUSED */
1753168404Spjdstatic int
1754248571Smmdsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
1755248571Smm    dsl_dataset_t *hds, void *arg)
1756168404Spjd{
1757248571Smm	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
1758248571Smm	int error;
1759168404Spjd	uint64_t val;
1760168404Spjd
1761248571Smm	error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
1762248571Smm	if (error != 0) {
1763248571Smm		/* ignore nonexistent snapshots */
1764248571Smm		return (error == ENOENT ? 0 : error);
1765248571Smm	}
1766168404Spjd
1767248571Smm	/* new name should not exist */
1768248571Smm	error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val);
1769248571Smm	if (error == 0)
1770249195Smm		error = SET_ERROR(EEXIST);
1771248571Smm	else if (error == ENOENT)
1772248571Smm		error = 0;
1773168404Spjd
1774168676Spjd	/* dataset name + 1 for the "@" + the new snapshot name must fit */
1775248571Smm	if (dsl_dir_namelen(hds->ds_dir) + 1 +
1776248571Smm	    strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN)
1777249195Smm		error = SET_ERROR(ENAMETOOLONG);
1778168676Spjd
1779248571Smm	return (error);
1780168404Spjd}
1781168404Spjd
1782248571Smmstatic int
1783248571Smmdsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx)
1784168404Spjd{
1785248571Smm	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
1786248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1787168404Spjd	dsl_dataset_t *hds;
1788248571Smm	int error;
1789168404Spjd
1790248571Smm	error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds);
1791248571Smm	if (error != 0)
1792248571Smm		return (error);
1793168404Spjd
1794248571Smm	if (ddrsa->ddrsa_recursive) {
1795248571Smm		error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
1796248571Smm		    dsl_dataset_rename_snapshot_check_impl, ddrsa,
1797248571Smm		    DS_FIND_CHILDREN);
1798248571Smm	} else {
1799248571Smm		error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa);
1800248571Smm	}
1801248571Smm	dsl_dataset_rele(hds, FTAG);
1802248571Smm	return (error);
1803248571Smm}
1804168404Spjd
1805248571Smmstatic int
1806248571Smmdsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
1807248571Smm    dsl_dataset_t *hds, void *arg)
1808248571Smm{
1809248571Smm#ifdef __FreeBSD__
1810248571Smm#ifdef _KERNEL
1811248571Smm	char *oldname, *newname;
1812248571Smm#endif
1813248571Smm#endif
1814248571Smm	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
1815248571Smm	dsl_dataset_t *ds;
1816248571Smm	uint64_t val;
1817248571Smm	dmu_tx_t *tx = ddrsa->ddrsa_tx;
1818248571Smm	int error;
1819248571Smm
1820248571Smm	error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
1821248571Smm	ASSERT(error == 0 || error == ENOENT);
1822248571Smm	if (error == ENOENT) {
1823248571Smm		/* ignore nonexistent snapshots */
1824248571Smm		return (0);
1825248571Smm	}
1826248571Smm
1827248571Smm	VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds));
1828248571Smm
1829248571Smm	/* log before we change the name */
1830248571Smm	spa_history_log_internal_ds(ds, "rename", tx,
1831248571Smm	    "-> @%s", ddrsa->ddrsa_newsnapname);
1832248571Smm
1833265744Sdelphij	VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx,
1834265744Sdelphij	    B_FALSE));
1835168404Spjd	mutex_enter(&ds->ds_lock);
1836248571Smm	(void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname);
1837168404Spjd	mutex_exit(&ds->ds_lock);
1838248571Smm	VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj,
1839248571Smm	    ds->ds_snapname, 8, 1, &ds->ds_object, tx));
1840248571Smm
1841248571Smm#ifdef __FreeBSD__
1842219320Spjd#ifdef _KERNEL
1843248571Smm	oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1844248571Smm	newname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1845248571Smm	snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname,
1846248571Smm	    ddrsa->ddrsa_oldsnapname);
1847248571Smm	snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname,
1848248571Smm	    ddrsa->ddrsa_newsnapname);
1849248571Smm	zfsvfs_update_fromname(oldname, newname);
1850219317Spjd	zvol_rename_minors(oldname, newname);
1851248571Smm	kmem_free(newname, MAXPATHLEN);
1852248571Smm	kmem_free(oldname, MAXPATHLEN);
1853219320Spjd#endif
1854248571Smm#endif
1855248571Smm	dsl_dataset_rele(ds, FTAG);
1856168404Spjd
1857248571Smm	return (0);
1858168404Spjd}
1859168404Spjd
1860248571Smmstatic void
1861248571Smmdsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx)
1862168676Spjd{
1863248571Smm	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
1864248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1865248571Smm	dsl_dataset_t *hds;
1866168676Spjd
1867248571Smm	VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds));
1868248571Smm	ddrsa->ddrsa_tx = tx;
1869248571Smm	if (ddrsa->ddrsa_recursive) {
1870248571Smm		VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
1871248571Smm		    dsl_dataset_rename_snapshot_sync_impl, ddrsa,
1872248571Smm		    DS_FIND_CHILDREN));
1873248571Smm	} else {
1874248571Smm		VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa));
1875168676Spjd	}
1876248571Smm	dsl_dataset_rele(hds, FTAG);
1877248571Smm}
1878168676Spjd
1879248571Smmint
1880248571Smmdsl_dataset_rename_snapshot(const char *fsname,
1881248571Smm    const char *oldsnapname, const char *newsnapname, boolean_t recursive)
1882248571Smm{
1883248571Smm	dsl_dataset_rename_snapshot_arg_t ddrsa;
1884168676Spjd
1885248571Smm	ddrsa.ddrsa_fsname = fsname;
1886248571Smm	ddrsa.ddrsa_oldsnapname = oldsnapname;
1887248571Smm	ddrsa.ddrsa_newsnapname = newsnapname;
1888248571Smm	ddrsa.ddrsa_recursive = recursive;
1889168676Spjd
1890248571Smm	return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
1891269006Sdelphij	    dsl_dataset_rename_snapshot_sync, &ddrsa,
1892269006Sdelphij	    1, ZFS_SPACE_CHECK_RESERVED));
1893168676Spjd}
1894168676Spjd
1895253816Sdelphij/*
1896253816Sdelphij * If we're doing an ownership handoff, we need to make sure that there is
1897253816Sdelphij * only one long hold on the dataset.  We're not allowed to change anything here
1898253816Sdelphij * so we don't permanently release the long hold or regular hold here.  We want
1899253816Sdelphij * to do this only when syncing to avoid the dataset unexpectedly going away
1900253816Sdelphij * when we release the long hold.
1901253816Sdelphij */
1902168676Spjdstatic int
1903253816Sdelphijdsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
1904253816Sdelphij{
1905253816Sdelphij	boolean_t held;
1906253816Sdelphij
1907253816Sdelphij	if (!dmu_tx_is_syncing(tx))
1908253816Sdelphij		return (0);
1909253816Sdelphij
1910253816Sdelphij	if (owner != NULL) {
1911253816Sdelphij		VERIFY3P(ds->ds_owner, ==, owner);
1912253816Sdelphij		dsl_dataset_long_rele(ds, owner);
1913253816Sdelphij	}
1914253816Sdelphij
1915253816Sdelphij	held = dsl_dataset_long_held(ds);
1916253816Sdelphij
1917253816Sdelphij	if (owner != NULL)
1918253816Sdelphij		dsl_dataset_long_hold(ds, owner);
1919253816Sdelphij
1920253816Sdelphij	if (held)
1921253816Sdelphij		return (SET_ERROR(EBUSY));
1922253816Sdelphij
1923253816Sdelphij	return (0);
1924253816Sdelphij}
1925253816Sdelphij
1926253816Sdelphijtypedef struct dsl_dataset_rollback_arg {
1927253816Sdelphij	const char *ddra_fsname;
1928253816Sdelphij	void *ddra_owner;
1929254587Sdelphij	nvlist_t *ddra_result;
1930253816Sdelphij} dsl_dataset_rollback_arg_t;
1931253816Sdelphij
1932253816Sdelphijstatic int
1933248571Smmdsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
1934168676Spjd{
1935253816Sdelphij	dsl_dataset_rollback_arg_t *ddra = arg;
1936248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
1937248571Smm	dsl_dataset_t *ds;
1938248571Smm	int64_t unused_refres_delta;
1939248571Smm	int error;
1940168676Spjd
1941253816Sdelphij	error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
1942248571Smm	if (error != 0)
1943248571Smm		return (error);
1944168676Spjd
1945248571Smm	/* must not be a snapshot */
1946248571Smm	if (dsl_dataset_is_snapshot(ds)) {
1947248571Smm		dsl_dataset_rele(ds, FTAG);
1948249195Smm		return (SET_ERROR(EINVAL));
1949168676Spjd	}
1950168676Spjd
1951248571Smm	/* must have a most recent snapshot */
1952248571Smm	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
1953248571Smm		dsl_dataset_rele(ds, FTAG);
1954249195Smm		return (SET_ERROR(EINVAL));
1955248571Smm	}
1956168676Spjd
1957263407Sdelphij	/* must not have any bookmarks after the most recent snapshot */
1958263407Sdelphij	nvlist_t *proprequest = fnvlist_alloc();
1959263407Sdelphij	fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));
1960263407Sdelphij	nvlist_t *bookmarks = fnvlist_alloc();
1961263407Sdelphij	error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks);
1962263407Sdelphij	fnvlist_free(proprequest);
1963263407Sdelphij	if (error != 0)
1964263407Sdelphij		return (error);
1965263407Sdelphij	for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL);
1966263407Sdelphij	    pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) {
1967263407Sdelphij		nvlist_t *valuenv =
1968263407Sdelphij		    fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair),
1969263407Sdelphij		    zfs_prop_to_name(ZFS_PROP_CREATETXG));
1970263407Sdelphij		uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value");
1971263407Sdelphij		if (createtxg > ds->ds_phys->ds_prev_snap_txg) {
1972263407Sdelphij			fnvlist_free(bookmarks);
1973263407Sdelphij			dsl_dataset_rele(ds, FTAG);
1974263407Sdelphij			return (SET_ERROR(EEXIST));
1975263407Sdelphij		}
1976263407Sdelphij	}
1977263407Sdelphij	fnvlist_free(bookmarks);
1978263407Sdelphij
1979253816Sdelphij	error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx);
1980253816Sdelphij	if (error != 0) {
1981248571Smm		dsl_dataset_rele(ds, FTAG);
1982253816Sdelphij		return (error);
1983248571Smm	}
1984168676Spjd
1985248571Smm	/*
1986248571Smm	 * Check if the snap we are rolling back to uses more than
1987248571Smm	 * the refquota.
1988248571Smm	 */
1989248571Smm	if (ds->ds_quota != 0 &&
1990248571Smm	    ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) {
1991248571Smm		dsl_dataset_rele(ds, FTAG);
1992249195Smm		return (SET_ERROR(EDQUOT));
1993168676Spjd	}
1994168676Spjd
1995248571Smm	/*
1996248571Smm	 * When we do the clone swap, we will temporarily use more space
1997248571Smm	 * due to the refreservation (the head will no longer have any
1998248571Smm	 * unique space, so the entire amount of the refreservation will need
1999248571Smm	 * to be free).  We will immediately destroy the clone, freeing
2000248571Smm	 * this space, but the freeing happens over many txg's.
2001248571Smm	 */
2002248571Smm	unused_refres_delta = (int64_t)MIN(ds->ds_reserved,
2003248571Smm	    ds->ds_phys->ds_unique_bytes);
2004168676Spjd
2005248571Smm	if (unused_refres_delta > 0 &&
2006248571Smm	    unused_refres_delta >
2007248571Smm	    dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) {
2008248571Smm		dsl_dataset_rele(ds, FTAG);
2009249195Smm		return (SET_ERROR(ENOSPC));
2010248571Smm	}
2011168676Spjd
2012248571Smm	dsl_dataset_rele(ds, FTAG);
2013185029Spjd	return (0);
2014185029Spjd}
2015185029Spjd
2016248571Smmstatic void
2017248571Smmdsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
2018168404Spjd{
2019253816Sdelphij	dsl_dataset_rollback_arg_t *ddra = arg;
2020248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
2021248571Smm	dsl_dataset_t *ds, *clone;
2022248571Smm	uint64_t cloneobj;
2023254587Sdelphij	char namebuf[ZFS_MAXNAMELEN];
2024168404Spjd
2025253816Sdelphij	VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));
2026219089Spjd
2027254587Sdelphij	dsl_dataset_name(ds->ds_prev, namebuf);
2028254587Sdelphij	fnvlist_add_string(ddra->ddra_result, "target", namebuf);
2029254587Sdelphij
2030248571Smm	cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
2031248571Smm	    ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx);
2032185029Spjd
2033248571Smm	VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone));
2034185029Spjd
2035248571Smm	dsl_dataset_clone_swap_sync_impl(clone, ds, tx);
2036248571Smm	dsl_dataset_zero_zil(ds, tx);
2037219089Spjd
2038248571Smm	dsl_destroy_head_sync_impl(clone, tx);
2039168404Spjd
2040248571Smm	dsl_dataset_rele(clone, FTAG);
2041248571Smm	dsl_dataset_rele(ds, FTAG);
2042248571Smm}
2043168404Spjd
2044253816Sdelphij/*
2045254587Sdelphij * Rolls back the given filesystem or volume to the most recent snapshot.
2046254587Sdelphij * The name of the most recent snapshot will be returned under key "target"
2047254587Sdelphij * in the result nvlist.
2048254587Sdelphij *
2049253816Sdelphij * If owner != NULL:
2050253816Sdelphij * - The existing dataset MUST be owned by the specified owner at entry
2051253816Sdelphij * - Upon return, dataset will still be held by the same owner, whether we
2052253816Sdelphij *   succeed or not.
2053253816Sdelphij *
2054253816Sdelphij * This mode is required any time the existing filesystem is mounted.  See
2055253816Sdelphij * notes above zfs_suspend_fs() for further details.
2056253816Sdelphij */
2057248571Smmint
2058254587Sdelphijdsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result)
2059248571Smm{
2060253816Sdelphij	dsl_dataset_rollback_arg_t ddra;
2061253816Sdelphij
2062253816Sdelphij	ddra.ddra_fsname = fsname;
2063253816Sdelphij	ddra.ddra_owner = owner;
2064254587Sdelphij	ddra.ddra_result = result;
2065253816Sdelphij
2066248571Smm	return (dsl_sync_task(fsname, dsl_dataset_rollback_check,
2067269006Sdelphij	    dsl_dataset_rollback_sync, &ddra,
2068269006Sdelphij	    1, ZFS_SPACE_CHECK_RESERVED));
2069168404Spjd}
2070168404Spjd
2071185029Spjdstruct promotenode {
2072185029Spjd	list_node_t link;
2073185029Spjd	dsl_dataset_t *ds;
2074185029Spjd};
2075185029Spjd
2076248571Smmtypedef struct dsl_dataset_promote_arg {
2077248571Smm	const char *ddpa_clonename;
2078248571Smm	dsl_dataset_t *ddpa_clone;
2079185029Spjd	list_t shared_snaps, origin_snaps, clone_snaps;
2080248571Smm	dsl_dataset_t *origin_origin; /* origin of the origin */
2081185029Spjd	uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
2082219089Spjd	char *err_ds;
2083265744Sdelphij	cred_t *cr;
2084248571Smm} dsl_dataset_promote_arg_t;
2085168404Spjd
2086185029Spjdstatic int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
2087248571Smmstatic int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp,
2088248571Smm    void *tag);
2089248571Smmstatic void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag);
2090185029Spjd
2091168404Spjdstatic int
2092248571Smmdsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
2093168404Spjd{
2094248571Smm	dsl_dataset_promote_arg_t *ddpa = arg;
2095248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
2096248571Smm	dsl_dataset_t *hds;
2097248571Smm	struct promotenode *snap;
2098248571Smm	dsl_dataset_t *origin_ds;
2099168404Spjd	int err;
2100219089Spjd	uint64_t unused;
2101265744Sdelphij	uint64_t ss_mv_cnt;
2102168404Spjd
2103248571Smm	err = promote_hold(ddpa, dp, FTAG);
2104248571Smm	if (err != 0)
2105248571Smm		return (err);
2106168404Spjd
2107248571Smm	hds = ddpa->ddpa_clone;
2108168404Spjd
2109248571Smm	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
2110248571Smm		promote_rele(ddpa, FTAG);
2111249195Smm		return (SET_ERROR(EXDEV));
2112248571Smm	}
2113168404Spjd
2114248571Smm	/*
2115248571Smm	 * Compute and check the amount of space to transfer.  Since this is
2116248571Smm	 * so expensive, don't do the preliminary check.
2117248571Smm	 */
2118248571Smm	if (!dmu_tx_is_syncing(tx)) {
2119248571Smm		promote_rele(ddpa, FTAG);
2120248571Smm		return (0);
2121248571Smm	}
2122248571Smm
2123248571Smm	snap = list_head(&ddpa->shared_snaps);
2124248571Smm	origin_ds = snap->ds;
2125248571Smm
2126185029Spjd	/* compute origin's new unique space */
2127248571Smm	snap = list_tail(&ddpa->clone_snaps);
2128185029Spjd	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
2129219089Spjd	dsl_deadlist_space_range(&snap->ds->ds_deadlist,
2130219089Spjd	    origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
2131248571Smm	    &ddpa->unique, &unused, &unused);
2132168404Spjd
2133185029Spjd	/*
2134185029Spjd	 * Walk the snapshots that we are moving
2135185029Spjd	 *
2136185029Spjd	 * Compute space to transfer.  Consider the incremental changes
2137248571Smm	 * to used by each snapshot:
2138185029Spjd	 * (my used) = (prev's used) + (blocks born) - (blocks killed)
2139185029Spjd	 * So each snapshot gave birth to:
2140185029Spjd	 * (blocks born) = (my used) - (prev's used) + (blocks killed)
2141185029Spjd	 * So a sequence would look like:
2142185029Spjd	 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
2143185029Spjd	 * Which simplifies to:
2144185029Spjd	 * uN + kN + kN-1 + ... + k1 + k0
2145185029Spjd	 * Note however, if we stop before we reach the ORIGIN we get:
2146185029Spjd	 * uN + kN + kN-1 + ... + kM - uM-1
2147185029Spjd	 */
2148265744Sdelphij	ss_mv_cnt = 0;
2149248571Smm	ddpa->used = origin_ds->ds_phys->ds_referenced_bytes;
2150248571Smm	ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes;
2151248571Smm	ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
2152248571Smm	for (snap = list_head(&ddpa->shared_snaps); snap;
2153248571Smm	    snap = list_next(&ddpa->shared_snaps, snap)) {
2154185029Spjd		uint64_t val, dlused, dlcomp, dluncomp;
2155185029Spjd		dsl_dataset_t *ds = snap->ds;
2156168404Spjd
2157265744Sdelphij		ss_mv_cnt++;
2158265744Sdelphij
2159248571Smm		/*
2160248571Smm		 * If there are long holds, we won't be able to evict
2161248571Smm		 * the objset.
2162248571Smm		 */
2163248571Smm		if (dsl_dataset_long_held(ds)) {
2164249195Smm			err = SET_ERROR(EBUSY);
2165248571Smm			goto out;
2166248571Smm		}
2167248571Smm
2168185029Spjd		/* Check that the snapshot name does not conflict */
2169248571Smm		VERIFY0(dsl_dataset_get_snapname(ds));
2170185029Spjd		err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
2171219089Spjd		if (err == 0) {
2172248571Smm			(void) strcpy(ddpa->err_ds, snap->ds->ds_snapname);
2173249195Smm			err = SET_ERROR(EEXIST);
2174219089Spjd			goto out;
2175219089Spjd		}
2176185029Spjd		if (err != ENOENT)
2177219089Spjd			goto out;
2178168404Spjd
2179185029Spjd		/* The very first snapshot does not have a deadlist */
2180185029Spjd		if (ds->ds_phys->ds_prev_snap_obj == 0)
2181185029Spjd			continue;
2182185029Spjd
2183219089Spjd		dsl_deadlist_space(&ds->ds_deadlist,
2184219089Spjd		    &dlused, &dlcomp, &dluncomp);
2185248571Smm		ddpa->used += dlused;
2186248571Smm		ddpa->comp += dlcomp;
2187248571Smm		ddpa->uncomp += dluncomp;
2188168404Spjd	}
2189168404Spjd
2190185029Spjd	/*
2191185029Spjd	 * If we are a clone of a clone then we never reached ORIGIN,
2192185029Spjd	 * so we need to subtract out the clone origin's used space.
2193185029Spjd	 */
2194248571Smm	if (ddpa->origin_origin) {
2195248571Smm		ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes;
2196248571Smm		ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes;
2197248571Smm		ddpa->uncomp -=
2198248571Smm		    ddpa->origin_origin->ds_phys->ds_uncompressed_bytes;
2199168404Spjd	}
2200168404Spjd
2201265744Sdelphij	/* Check that there is enough space and limit headroom here */
2202185029Spjd	err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2203265744Sdelphij	    0, ss_mv_cnt, ddpa->used, ddpa->cr);
2204248571Smm	if (err != 0)
2205248571Smm		goto out;
2206168404Spjd
2207185029Spjd	/*
2208185029Spjd	 * Compute the amounts of space that will be used by snapshots
2209185029Spjd	 * after the promotion (for both origin and clone).  For each,
2210185029Spjd	 * it is the amount of space that will be on all of their
2211185029Spjd	 * deadlists (that was not born before their new origin).
2212185029Spjd	 */
2213185029Spjd	if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2214185029Spjd		uint64_t space;
2215168404Spjd
2216168404Spjd		/*
2217185029Spjd		 * Note, typically this will not be a clone of a clone,
2218219089Spjd		 * so dd_origin_txg will be < TXG_INITIAL, so
2219219089Spjd		 * these snaplist_space() -> dsl_deadlist_space_range()
2220185029Spjd		 * calls will be fast because they do not have to
2221185029Spjd		 * iterate over all bps.
2222168404Spjd		 */
2223248571Smm		snap = list_head(&ddpa->origin_snaps);
2224248571Smm		err = snaplist_space(&ddpa->shared_snaps,
2225248571Smm		    snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap);
2226248571Smm		if (err != 0)
2227248571Smm			goto out;
2228168404Spjd
2229248571Smm		err = snaplist_space(&ddpa->clone_snaps,
2230219089Spjd		    snap->ds->ds_dir->dd_origin_txg, &space);
2231248571Smm		if (err != 0)
2232248571Smm			goto out;
2233248571Smm		ddpa->cloneusedsnap += space;
2234168404Spjd	}
2235185029Spjd	if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2236248571Smm		err = snaplist_space(&ddpa->origin_snaps,
2237248571Smm		    origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap);
2238248571Smm		if (err != 0)
2239248571Smm			goto out;
2240185029Spjd	}
2241168404Spjd
2242219089Spjdout:
2243248571Smm	promote_rele(ddpa, FTAG);
2244219089Spjd	return (err);
2245168404Spjd}
2246168404Spjd
2247168404Spjdstatic void
2248248571Smmdsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
2249168404Spjd{
2250248571Smm	dsl_dataset_promote_arg_t *ddpa = arg;
2251248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
2252248571Smm	dsl_dataset_t *hds;
2253248571Smm	struct promotenode *snap;
2254248571Smm	dsl_dataset_t *origin_ds;
2255185029Spjd	dsl_dataset_t *origin_head;
2256248571Smm	dsl_dir_t *dd;
2257185029Spjd	dsl_dir_t *odd = NULL;
2258185029Spjd	uint64_t oldnext_obj;
2259185029Spjd	int64_t delta;
2260273162Ssmh#if defined(__FreeBSD__) && defined(_KERNEL)
2261273162Ssmh	char *oldname, *newname;
2262273162Ssmh#endif
2263168404Spjd
2264248571Smm	VERIFY0(promote_hold(ddpa, dp, FTAG));
2265248571Smm	hds = ddpa->ddpa_clone;
2266168404Spjd
2267248571Smm	ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE);
2268248571Smm
2269248571Smm	snap = list_head(&ddpa->shared_snaps);
2270248571Smm	origin_ds = snap->ds;
2271248571Smm	dd = hds->ds_dir;
2272248571Smm
2273248571Smm	snap = list_head(&ddpa->origin_snaps);
2274185029Spjd	origin_head = snap->ds;
2275185029Spjd
2276168404Spjd	/*
2277185029Spjd	 * We need to explicitly open odd, since origin_ds's dd will be
2278168404Spjd	 * changing.
2279168404Spjd	 */
2280248571Smm	VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object,
2281185029Spjd	    NULL, FTAG, &odd));
2282168404Spjd
2283185029Spjd	/* change origin's next snap */
2284185029Spjd	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
2285185029Spjd	oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
2286248571Smm	snap = list_tail(&ddpa->clone_snaps);
2287185029Spjd	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
2288185029Spjd	origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
2289185029Spjd
2290185029Spjd	/* change the origin's next clone */
2291185029Spjd	if (origin_ds->ds_phys->ds_next_clones_obj) {
2292248571Smm		dsl_dataset_remove_from_next_clones(origin_ds,
2293248571Smm		    snap->ds->ds_object, tx);
2294248571Smm		VERIFY0(zap_add_int(dp->dp_meta_objset,
2295185029Spjd		    origin_ds->ds_phys->ds_next_clones_obj,
2296185029Spjd		    oldnext_obj, tx));
2297185029Spjd	}
2298185029Spjd
2299185029Spjd	/* change origin */
2300185029Spjd	dmu_buf_will_dirty(dd->dd_dbuf, tx);
2301185029Spjd	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
2302185029Spjd	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
2303219089Spjd	dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;
2304185029Spjd	dmu_buf_will_dirty(odd->dd_dbuf, tx);
2305185029Spjd	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
2306219089Spjd	origin_head->ds_dir->dd_origin_txg =
2307219089Spjd	    origin_ds->ds_phys->ds_creation_txg;
2308185029Spjd
2309219089Spjd	/* change dd_clone entries */
2310219089Spjd	if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2311248571Smm		VERIFY0(zap_remove_int(dp->dp_meta_objset,
2312219089Spjd		    odd->dd_phys->dd_clones, hds->ds_object, tx));
2313248571Smm		VERIFY0(zap_add_int(dp->dp_meta_objset,
2314248571Smm		    ddpa->origin_origin->ds_dir->dd_phys->dd_clones,
2315219089Spjd		    hds->ds_object, tx));
2316219089Spjd
2317248571Smm		VERIFY0(zap_remove_int(dp->dp_meta_objset,
2318248571Smm		    ddpa->origin_origin->ds_dir->dd_phys->dd_clones,
2319219089Spjd		    origin_head->ds_object, tx));
2320219089Spjd		if (dd->dd_phys->dd_clones == 0) {
2321219089Spjd			dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset,
2322219089Spjd			    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
2323219089Spjd		}
2324248571Smm		VERIFY0(zap_add_int(dp->dp_meta_objset,
2325219089Spjd		    dd->dd_phys->dd_clones, origin_head->ds_object, tx));
2326219089Spjd	}
2327219089Spjd
2328273162Ssmh#if defined(__FreeBSD__) && defined(_KERNEL)
2329273162Ssmh	/* Take the spa_namespace_lock early so zvol renames don't deadlock. */
2330273162Ssmh	mutex_enter(&spa_namespace_lock);
2331273162Ssmh
2332273162Ssmh	oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2333273162Ssmh	newname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2334273162Ssmh#endif
2335273162Ssmh
2336168404Spjd	/* move snapshots to this dir */
2337248571Smm	for (snap = list_head(&ddpa->shared_snaps); snap;
2338248571Smm	    snap = list_next(&ddpa->shared_snaps, snap)) {
2339185029Spjd		dsl_dataset_t *ds = snap->ds;
2340168404Spjd
2341248571Smm		/*
2342248571Smm		 * Property callbacks are registered to a particular
2343248571Smm		 * dsl_dir.  Since ours is changing, evict the objset
2344248571Smm		 * so that they will be unregistered from the old dsl_dir.
2345248571Smm		 */
2346219089Spjd		if (ds->ds_objset) {
2347219089Spjd			dmu_objset_evict(ds->ds_objset);
2348219089Spjd			ds->ds_objset = NULL;
2349185029Spjd		}
2350248571Smm
2351168404Spjd		/* move snap name entry */
2352248571Smm		VERIFY0(dsl_dataset_get_snapname(ds));
2353248571Smm		VERIFY0(dsl_dataset_snap_remove(origin_head,
2354265744Sdelphij		    ds->ds_snapname, tx, B_TRUE));
2355248571Smm		VERIFY0(zap_add(dp->dp_meta_objset,
2356168404Spjd		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2357168404Spjd		    8, 1, &ds->ds_object, tx));
2358265744Sdelphij		dsl_fs_ss_count_adjust(hds->ds_dir, 1,
2359265744Sdelphij		    DD_FIELD_SNAPSHOT_COUNT, tx);
2360219089Spjd
2361168404Spjd		/* change containing dsl_dir */
2362168404Spjd		dmu_buf_will_dirty(ds->ds_dbuf, tx);
2363185029Spjd		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2364168404Spjd		ds->ds_phys->ds_dir_obj = dd->dd_object;
2365185029Spjd		ASSERT3P(ds->ds_dir, ==, odd);
2366248571Smm		dsl_dir_rele(ds->ds_dir, ds);
2367248571Smm		VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object,
2368168404Spjd		    NULL, ds, &ds->ds_dir));
2369168404Spjd
2370273162Ssmh#if defined(__FreeBSD__) && defined(_KERNEL)
2371273162Ssmh		dsl_dataset_name(ds, newname);
2372273162Ssmh		zfsvfs_update_fromname(oldname, newname);
2373273162Ssmh		zvol_rename_minors(oldname, newname);
2374273162Ssmh#endif
2375273162Ssmh
2376219089Spjd		/* move any clone references */
2377219089Spjd		if (ds->ds_phys->ds_next_clones_obj &&
2378219089Spjd		    spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2379219089Spjd			zap_cursor_t zc;
2380219089Spjd			zap_attribute_t za;
2381219089Spjd
2382219089Spjd			for (zap_cursor_init(&zc, dp->dp_meta_objset,
2383219089Spjd			    ds->ds_phys->ds_next_clones_obj);
2384219089Spjd			    zap_cursor_retrieve(&zc, &za) == 0;
2385219089Spjd			    zap_cursor_advance(&zc)) {
2386219089Spjd				dsl_dataset_t *cnds;
2387219089Spjd				uint64_t o;
2388219089Spjd
2389219089Spjd				if (za.za_first_integer == oldnext_obj) {
2390219089Spjd					/*
2391219089Spjd					 * We've already moved the
2392219089Spjd					 * origin's reference.
2393219089Spjd					 */
2394219089Spjd					continue;
2395219089Spjd				}
2396219089Spjd
2397248571Smm				VERIFY0(dsl_dataset_hold_obj(dp,
2398219089Spjd				    za.za_first_integer, FTAG, &cnds));
2399219089Spjd				o = cnds->ds_dir->dd_phys->dd_head_dataset_obj;
2400219089Spjd
2401248571Smm				VERIFY0(zap_remove_int(dp->dp_meta_objset,
2402248571Smm				    odd->dd_phys->dd_clones, o, tx));
2403248571Smm				VERIFY0(zap_add_int(dp->dp_meta_objset,
2404248571Smm				    dd->dd_phys->dd_clones, o, tx));
2405219089Spjd				dsl_dataset_rele(cnds, FTAG);
2406219089Spjd			}
2407219089Spjd			zap_cursor_fini(&zc);
2408219089Spjd		}
2409219089Spjd
2410248571Smm		ASSERT(!dsl_prop_hascb(ds));
2411185029Spjd	}
2412168404Spjd
2413273162Ssmh#if defined(__FreeBSD__) && defined(_KERNEL)
2414273162Ssmh	mutex_exit(&spa_namespace_lock);
2415273162Ssmh
2416273162Ssmh	kmem_free(newname, MAXPATHLEN);
2417273162Ssmh	kmem_free(oldname, MAXPATHLEN);
2418273162Ssmh#endif
2419185029Spjd	/*
2420185029Spjd	 * Change space accounting.
2421185029Spjd	 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
2422185029Spjd	 * both be valid, or both be 0 (resulting in delta == 0).  This
2423185029Spjd	 * is true for each of {clone,origin} independently.
2424185029Spjd	 */
2425168404Spjd
2426248571Smm	delta = ddpa->cloneusedsnap -
2427185029Spjd	    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
2428185029Spjd	ASSERT3S(delta, >=, 0);
2429248571Smm	ASSERT3U(ddpa->used, >=, delta);
2430185029Spjd	dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
2431185029Spjd	dsl_dir_diduse_space(dd, DD_USED_HEAD,
2432248571Smm	    ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx);
2433168404Spjd
2434248571Smm	delta = ddpa->originusedsnap -
2435185029Spjd	    odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
2436185029Spjd	ASSERT3S(delta, <=, 0);
2437248571Smm	ASSERT3U(ddpa->used, >=, -delta);
2438185029Spjd	dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
2439185029Spjd	dsl_dir_diduse_space(odd, DD_USED_HEAD,
2440248571Smm	    -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx);
2441185029Spjd
2442248571Smm	origin_ds->ds_phys->ds_unique_bytes = ddpa->unique;
2443185029Spjd
2444185029Spjd	/* log history record */
2445248571Smm	spa_history_log_internal_ds(hds, "promote", tx, "");
2446185029Spjd
2447248571Smm	dsl_dir_rele(odd, FTAG);
2448248571Smm	promote_rele(ddpa, FTAG);
2449185029Spjd}
2450185029Spjd
2451185029Spjd/*
2452185029Spjd * Make a list of dsl_dataset_t's for the snapshots between first_obj
2453185029Spjd * (exclusive) and last_obj (inclusive).  The list will be in reverse
2454185029Spjd * order (last_obj will be the list_head()).  If first_obj == 0, do all
2455185029Spjd * snapshots back to this dataset's origin.
2456185029Spjd */
2457185029Spjdstatic int
2458248571Smmsnaplist_make(dsl_pool_t *dp,
2459248571Smm    uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag)
2460185029Spjd{
2461185029Spjd	uint64_t obj = last_obj;
2462185029Spjd
2463185029Spjd	list_create(l, sizeof (struct promotenode),
2464185029Spjd	    offsetof(struct promotenode, link));
2465185029Spjd
2466185029Spjd	while (obj != first_obj) {
2467185029Spjd		dsl_dataset_t *ds;
2468185029Spjd		struct promotenode *snap;
2469185029Spjd		int err;
2470185029Spjd
2471248571Smm		err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
2472248571Smm		ASSERT(err != ENOENT);
2473248571Smm		if (err != 0)
2474185029Spjd			return (err);
2475185029Spjd
2476185029Spjd		if (first_obj == 0)
2477185029Spjd			first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
2478185029Spjd
2479248571Smm		snap = kmem_alloc(sizeof (*snap), KM_SLEEP);
2480185029Spjd		snap->ds = ds;
2481185029Spjd		list_insert_tail(l, snap);
2482185029Spjd		obj = ds->ds_phys->ds_prev_snap_obj;
2483168404Spjd	}
2484168404Spjd
2485185029Spjd	return (0);
2486185029Spjd}
2487168404Spjd
2488185029Spjdstatic int
2489185029Spjdsnaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
2490185029Spjd{
2491185029Spjd	struct promotenode *snap;
2492168404Spjd
2493185029Spjd	*spacep = 0;
2494185029Spjd	for (snap = list_head(l); snap; snap = list_next(l, snap)) {
2495219089Spjd		uint64_t used, comp, uncomp;
2496219089Spjd		dsl_deadlist_space_range(&snap->ds->ds_deadlist,
2497219089Spjd		    mintxg, UINT64_MAX, &used, &comp, &uncomp);
2498185029Spjd		*spacep += used;
2499185029Spjd	}
2500185029Spjd	return (0);
2501185029Spjd}
2502168404Spjd
2503185029Spjdstatic void
2504248571Smmsnaplist_destroy(list_t *l, void *tag)
2505185029Spjd{
2506185029Spjd	struct promotenode *snap;
2507185029Spjd
2508248571Smm	if (l == NULL || !list_link_active(&l->list_head))
2509185029Spjd		return;
2510185029Spjd
2511185029Spjd	while ((snap = list_tail(l)) != NULL) {
2512185029Spjd		list_remove(l, snap);
2513248571Smm		dsl_dataset_rele(snap->ds, tag);
2514248571Smm		kmem_free(snap, sizeof (*snap));
2515185029Spjd	}
2516185029Spjd	list_destroy(l);
2517168404Spjd}
2518168404Spjd
2519248571Smmstatic int
2520248571Smmpromote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
2521168404Spjd{
2522248571Smm	int error;
2523185029Spjd	dsl_dir_t *dd;
2524185029Spjd	struct promotenode *snap;
2525168404Spjd
2526248571Smm	error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,
2527248571Smm	    &ddpa->ddpa_clone);
2528248571Smm	if (error != 0)
2529248571Smm		return (error);
2530248571Smm	dd = ddpa->ddpa_clone->ds_dir;
2531168404Spjd
2532248571Smm	if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) ||
2533248571Smm	    !dsl_dir_is_clone(dd)) {
2534248571Smm		dsl_dataset_rele(ddpa->ddpa_clone, tag);
2535249195Smm		return (SET_ERROR(EINVAL));
2536185029Spjd	}
2537185029Spjd
2538248571Smm	error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj,
2539248571Smm	    &ddpa->shared_snaps, tag);
2540248571Smm	if (error != 0)
2541185029Spjd		goto out;
2542185029Spjd
2543248571Smm	error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,
2544248571Smm	    &ddpa->clone_snaps, tag);
2545248571Smm	if (error != 0)
2546185029Spjd		goto out;
2547185029Spjd
2548248571Smm	snap = list_head(&ddpa->shared_snaps);
2549185029Spjd	ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
2550248571Smm	error = snaplist_make(dp, dd->dd_phys->dd_origin_obj,
2551248571Smm	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj,
2552248571Smm	    &ddpa->origin_snaps, tag);
2553248571Smm	if (error != 0)
2554185029Spjd		goto out;
2555185029Spjd
2556219089Spjd	if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
2557248571Smm		error = dsl_dataset_hold_obj(dp,
2558185029Spjd		    snap->ds->ds_dir->dd_phys->dd_origin_obj,
2559248571Smm		    tag, &ddpa->origin_origin);
2560248571Smm		if (error != 0)
2561185029Spjd			goto out;
2562185029Spjd	}
2563185029Spjdout:
2564248571Smm	if (error != 0)
2565248571Smm		promote_rele(ddpa, tag);
2566248571Smm	return (error);
2567248571Smm}
2568185029Spjd
2569248571Smmstatic void
2570248571Smmpromote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag)
2571248571Smm{
2572248571Smm	snaplist_destroy(&ddpa->shared_snaps, tag);
2573248571Smm	snaplist_destroy(&ddpa->clone_snaps, tag);
2574248571Smm	snaplist_destroy(&ddpa->origin_snaps, tag);
2575248571Smm	if (ddpa->origin_origin != NULL)
2576248571Smm		dsl_dataset_rele(ddpa->origin_origin, tag);
2577248571Smm	dsl_dataset_rele(ddpa->ddpa_clone, tag);
2578248571Smm}
2579248571Smm
2580248571Smm/*
2581248571Smm * Promote a clone.
2582248571Smm *
2583248571Smm * If it fails due to a conflicting snapshot name, "conflsnap" will be filled
2584248571Smm * in with the name.  (It must be at least MAXNAMELEN bytes long.)
2585248571Smm */
2586248571Smmint
2587248571Smmdsl_dataset_promote(const char *name, char *conflsnap)
2588248571Smm{
2589248571Smm	dsl_dataset_promote_arg_t ddpa = { 0 };
2590248571Smm	uint64_t numsnaps;
2591248571Smm	int error;
2592248571Smm	objset_t *os;
2593248571Smm
2594185029Spjd	/*
2595248571Smm	 * We will modify space proportional to the number of
2596248571Smm	 * snapshots.  Compute numsnaps.
2597168404Spjd	 */
2598248571Smm	error = dmu_objset_hold(name, FTAG, &os);
2599248571Smm	if (error != 0)
2600248571Smm		return (error);
2601248571Smm	error = zap_count(dmu_objset_pool(os)->dp_meta_objset,
2602248571Smm	    dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps);
2603248571Smm	dmu_objset_rele(os, FTAG);
2604248571Smm	if (error != 0)
2605248571Smm		return (error);
2606185029Spjd
2607248571Smm	ddpa.ddpa_clonename = name;
2608248571Smm	ddpa.err_ds = conflsnap;
2609265744Sdelphij	ddpa.cr = CRED();
2610248571Smm
2611248571Smm	return (dsl_sync_task(name, dsl_dataset_promote_check,
2612269006Sdelphij	    dsl_dataset_promote_sync, &ddpa,
2613269006Sdelphij	    2 + numsnaps, ZFS_SPACE_CHECK_RESERVED));
2614168404Spjd}
2615168404Spjd
2616248571Smmint
2617248571Smmdsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
2618253816Sdelphij    dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
2619185029Spjd{
2620248571Smm	int64_t unused_refres_delta;
2621185029Spjd
2622185029Spjd	/* they should both be heads */
2623248571Smm	if (dsl_dataset_is_snapshot(clone) ||
2624248571Smm	    dsl_dataset_is_snapshot(origin_head))
2625249195Smm		return (SET_ERROR(EINVAL));
2626185029Spjd
2627253820Sdelphij	/* if we are not forcing, the branch point should be just before them */
2628253820Sdelphij	if (!force && clone->ds_prev != origin_head->ds_prev)
2629249195Smm		return (SET_ERROR(EINVAL));
2630185029Spjd
2631248571Smm	/* clone should be the clone (unless they are unrelated) */
2632248571Smm	if (clone->ds_prev != NULL &&
2633248571Smm	    clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&
2634253820Sdelphij	    origin_head->ds_dir != clone->ds_prev->ds_dir)
2635249195Smm		return (SET_ERROR(EINVAL));
2636185029Spjd
2637185029Spjd	/* the clone should be a child of the origin */
2638248571Smm	if (clone->ds_dir->dd_parent != origin_head->ds_dir)
2639249195Smm		return (SET_ERROR(EINVAL));
2640185029Spjd
2641248571Smm	/* origin_head shouldn't be modified unless 'force' */
2642253820Sdelphij	if (!force &&
2643253820Sdelphij	    dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev))
2644249195Smm		return (SET_ERROR(ETXTBSY));
2645185029Spjd
2646248571Smm	/* origin_head should have no long holds (e.g. is not mounted) */
2647253816Sdelphij	if (dsl_dataset_handoff_check(origin_head, owner, tx))
2648249195Smm		return (SET_ERROR(EBUSY));
2649185029Spjd
2650248571Smm	/* check amount of any unconsumed refreservation */
2651248571Smm	unused_refres_delta =
2652248571Smm	    (int64_t)MIN(origin_head->ds_reserved,
2653248571Smm	    origin_head->ds_phys->ds_unique_bytes) -
2654248571Smm	    (int64_t)MIN(origin_head->ds_reserved,
2655248571Smm	    clone->ds_phys->ds_unique_bytes);
2656248571Smm
2657248571Smm	if (unused_refres_delta > 0 &&
2658248571Smm	    unused_refres_delta >
2659248571Smm	    dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE))
2660249195Smm		return (SET_ERROR(ENOSPC));
2661185029Spjd
2662248571Smm	/* clone can't be over the head's refquota */
2663248571Smm	if (origin_head->ds_quota != 0 &&
2664248571Smm	    clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota)
2665249195Smm		return (SET_ERROR(EDQUOT));
2666219089Spjd
2667185029Spjd	return (0);
2668185029Spjd}
2669185029Spjd
2670248571Smmvoid
2671248571Smmdsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
2672248571Smm    dsl_dataset_t *origin_head, dmu_tx_t *tx)
2673185029Spjd{
2674248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
2675248571Smm	int64_t unused_refres_delta;
2676185029Spjd
2677248571Smm	ASSERT(clone->ds_reserved == 0);
2678248571Smm	ASSERT(origin_head->ds_quota == 0 ||
2679248571Smm	    clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota);
2680253820Sdelphij	ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
2681185029Spjd
2682248571Smm	dmu_buf_will_dirty(clone->ds_dbuf, tx);
2683248571Smm	dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
2684185029Spjd
2685248571Smm	if (clone->ds_objset != NULL) {
2686248571Smm		dmu_objset_evict(clone->ds_objset);
2687248571Smm		clone->ds_objset = NULL;
2688185029Spjd	}
2689185029Spjd
2690248571Smm	if (origin_head->ds_objset != NULL) {
2691248571Smm		dmu_objset_evict(origin_head->ds_objset);
2692248571Smm		origin_head->ds_objset = NULL;
2693185029Spjd	}
2694185029Spjd
2695248571Smm	unused_refres_delta =
2696248571Smm	    (int64_t)MIN(origin_head->ds_reserved,
2697248571Smm	    origin_head->ds_phys->ds_unique_bytes) -
2698248571Smm	    (int64_t)MIN(origin_head->ds_reserved,
2699248571Smm	    clone->ds_phys->ds_unique_bytes);
2700248571Smm
2701219089Spjd	/*
2702219089Spjd	 * Reset origin's unique bytes, if it exists.
2703219089Spjd	 */
2704248571Smm	if (clone->ds_prev) {
2705248571Smm		dsl_dataset_t *origin = clone->ds_prev;
2706219089Spjd		uint64_t comp, uncomp;
2707185029Spjd
2708219089Spjd		dmu_buf_will_dirty(origin->ds_dbuf, tx);
2709248571Smm		dsl_deadlist_space_range(&clone->ds_deadlist,
2710219089Spjd		    origin->ds_phys->ds_prev_snap_txg, UINT64_MAX,
2711219089Spjd		    &origin->ds_phys->ds_unique_bytes, &comp, &uncomp);
2712219089Spjd	}
2713219089Spjd
2714185029Spjd	/* swap blkptrs */
2715185029Spjd	{
2716185029Spjd		blkptr_t tmp;
2717248571Smm		tmp = origin_head->ds_phys->ds_bp;
2718248571Smm		origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp;
2719248571Smm		clone->ds_phys->ds_bp = tmp;
2720185029Spjd	}
2721185029Spjd
2722185029Spjd	/* set dd_*_bytes */
2723185029Spjd	{
2724185029Spjd		int64_t dused, dcomp, duncomp;
2725185029Spjd		uint64_t cdl_used, cdl_comp, cdl_uncomp;
2726185029Spjd		uint64_t odl_used, odl_comp, odl_uncomp;
2727185029Spjd
2728248571Smm		ASSERT3U(clone->ds_dir->dd_phys->
2729185029Spjd		    dd_used_breakdown[DD_USED_SNAP], ==, 0);
2730185029Spjd
2731248571Smm		dsl_deadlist_space(&clone->ds_deadlist,
2732219089Spjd		    &cdl_used, &cdl_comp, &cdl_uncomp);
2733248571Smm		dsl_deadlist_space(&origin_head->ds_deadlist,
2734219089Spjd		    &odl_used, &odl_comp, &odl_uncomp);
2735185029Spjd
2736248571Smm		dused = clone->ds_phys->ds_referenced_bytes + cdl_used -
2737248571Smm		    (origin_head->ds_phys->ds_referenced_bytes + odl_used);
2738248571Smm		dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp -
2739248571Smm		    (origin_head->ds_phys->ds_compressed_bytes + odl_comp);
2740248571Smm		duncomp = clone->ds_phys->ds_uncompressed_bytes +
2741185029Spjd		    cdl_uncomp -
2742248571Smm		    (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp);
2743185029Spjd
2744248571Smm		dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD,
2745185029Spjd		    dused, dcomp, duncomp, tx);
2746248571Smm		dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD,
2747185029Spjd		    -dused, -dcomp, -duncomp, tx);
2748185029Spjd
2749185029Spjd		/*
2750185029Spjd		 * The difference in the space used by snapshots is the
2751185029Spjd		 * difference in snapshot space due to the head's
2752185029Spjd		 * deadlist (since that's the only thing that's
2753185029Spjd		 * changing that affects the snapused).
2754185029Spjd		 */
2755248571Smm		dsl_deadlist_space_range(&clone->ds_deadlist,
2756248571Smm		    origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
2757219089Spjd		    &cdl_used, &cdl_comp, &cdl_uncomp);
2758248571Smm		dsl_deadlist_space_range(&origin_head->ds_deadlist,
2759248571Smm		    origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
2760219089Spjd		    &odl_used, &odl_comp, &odl_uncomp);
2761248571Smm		dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used,
2762185029Spjd		    DD_USED_HEAD, DD_USED_SNAP, tx);
2763185029Spjd	}
2764185029Spjd
2765185029Spjd	/* swap ds_*_bytes */
2766248571Smm	SWITCH64(origin_head->ds_phys->ds_referenced_bytes,
2767248571Smm	    clone->ds_phys->ds_referenced_bytes);
2768248571Smm	SWITCH64(origin_head->ds_phys->ds_compressed_bytes,
2769248571Smm	    clone->ds_phys->ds_compressed_bytes);
2770248571Smm	SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes,
2771248571Smm	    clone->ds_phys->ds_uncompressed_bytes);
2772248571Smm	SWITCH64(origin_head->ds_phys->ds_unique_bytes,
2773248571Smm	    clone->ds_phys->ds_unique_bytes);
2774185029Spjd
2775185029Spjd	/* apply any parent delta for change in unconsumed refreservation */
2776248571Smm	dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV,
2777248571Smm	    unused_refres_delta, 0, 0, tx);
2778185029Spjd
2779219089Spjd	/*
2780219089Spjd	 * Swap deadlists.
2781219089Spjd	 */
2782248571Smm	dsl_deadlist_close(&clone->ds_deadlist);
2783248571Smm	dsl_deadlist_close(&origin_head->ds_deadlist);
2784248571Smm	SWITCH64(origin_head->ds_phys->ds_deadlist_obj,
2785248571Smm	    clone->ds_phys->ds_deadlist_obj);
2786248571Smm	dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset,
2787248571Smm	    clone->ds_phys->ds_deadlist_obj);
2788248571Smm	dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset,
2789248571Smm	    origin_head->ds_phys->ds_deadlist_obj);
2790208047Smm
2791248571Smm	dsl_scan_ds_clone_swapped(origin_head, clone, tx);
2792185029Spjd
2793248571Smm	spa_history_log_internal_ds(clone, "clone swap", tx,
2794248571Smm	    "parent=%s", origin_head->ds_dir->dd_myname);
2795185029Spjd}
2796185029Spjd
2797185029Spjd/*
2798168404Spjd * Given a pool name and a dataset object number in that pool,
2799168404Spjd * return the name of that dataset.
2800168404Spjd */
2801168404Spjdint
2802168404Spjddsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
2803168404Spjd{
2804168404Spjd	dsl_pool_t *dp;
2805185029Spjd	dsl_dataset_t *ds;
2806168404Spjd	int error;
2807168404Spjd
2808248571Smm	error = dsl_pool_hold(pname, FTAG, &dp);
2809248571Smm	if (error != 0)
2810168404Spjd		return (error);
2811248571Smm
2812248571Smm	error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
2813248571Smm	if (error == 0) {
2814185029Spjd		dsl_dataset_name(ds, buf);
2815185029Spjd		dsl_dataset_rele(ds, FTAG);
2816168404Spjd	}
2817248571Smm	dsl_pool_rele(dp, FTAG);
2818168404Spjd
2819185029Spjd	return (error);
2820185029Spjd}
2821185029Spjd
2822185029Spjdint
2823185029Spjddsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
2824185029Spjd    uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
2825185029Spjd{
2826185029Spjd	int error = 0;
2827185029Spjd
2828185029Spjd	ASSERT3S(asize, >, 0);
2829185029Spjd
2830185029Spjd	/*
2831185029Spjd	 * *ref_rsrv is the portion of asize that will come from any
2832185029Spjd	 * unconsumed refreservation space.
2833185029Spjd	 */
2834185029Spjd	*ref_rsrv = 0;
2835185029Spjd
2836185029Spjd	mutex_enter(&ds->ds_lock);
2837185029Spjd	/*
2838185029Spjd	 * Make a space adjustment for reserved bytes.
2839185029Spjd	 */
2840185029Spjd	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
2841185029Spjd		ASSERT3U(*used, >=,
2842185029Spjd		    ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
2843185029Spjd		*used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
2844185029Spjd		*ref_rsrv =
2845185029Spjd		    asize - MIN(asize, parent_delta(ds, asize + inflight));
2846185029Spjd	}
2847185029Spjd
2848185029Spjd	if (!check_quota || ds->ds_quota == 0) {
2849185029Spjd		mutex_exit(&ds->ds_lock);
2850185029Spjd		return (0);
2851185029Spjd	}
2852185029Spjd	/*
2853185029Spjd	 * If they are requesting more space, and our current estimate
2854185029Spjd	 * is over quota, they get to try again unless the actual
2855185029Spjd	 * on-disk is over quota and there are no pending changes (which
2856185029Spjd	 * may free up space for us).
2857185029Spjd	 */
2858236884Smm	if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
2859236884Smm		if (inflight > 0 ||
2860236884Smm		    ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
2861249195Smm			error = SET_ERROR(ERESTART);
2862185029Spjd		else
2863249195Smm			error = SET_ERROR(EDQUOT);
2864185029Spjd	}
2865185029Spjd	mutex_exit(&ds->ds_lock);
2866185029Spjd
2867185029Spjd	return (error);
2868185029Spjd}
2869185029Spjd
2870248571Smmtypedef struct dsl_dataset_set_qr_arg {
2871248571Smm	const char *ddsqra_name;
2872248571Smm	zprop_source_t ddsqra_source;
2873248571Smm	uint64_t ddsqra_value;
2874248571Smm} dsl_dataset_set_qr_arg_t;
2875248571Smm
2876248571Smm
2877185029Spjd/* ARGSUSED */
2878185029Spjdstatic int
2879248571Smmdsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
2880185029Spjd{
2881248571Smm	dsl_dataset_set_qr_arg_t *ddsqra = arg;
2882248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
2883248571Smm	dsl_dataset_t *ds;
2884248571Smm	int error;
2885248571Smm	uint64_t newval;
2886185029Spjd
2887248571Smm	if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)
2888249195Smm		return (SET_ERROR(ENOTSUP));
2889185029Spjd
2890248571Smm	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2891248571Smm	if (error != 0)
2892248571Smm		return (error);
2893219089Spjd
2894248571Smm	if (dsl_dataset_is_snapshot(ds)) {
2895248571Smm		dsl_dataset_rele(ds, FTAG);
2896249195Smm		return (SET_ERROR(EINVAL));
2897248571Smm	}
2898248571Smm
2899248571Smm	error = dsl_prop_predict(ds->ds_dir,
2900248571Smm	    zfs_prop_to_name(ZFS_PROP_REFQUOTA),
2901248571Smm	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2902248571Smm	if (error != 0) {
2903248571Smm		dsl_dataset_rele(ds, FTAG);
2904248571Smm		return (error);
2905248571Smm	}
2906248571Smm
2907248571Smm	if (newval == 0) {
2908248571Smm		dsl_dataset_rele(ds, FTAG);
2909185029Spjd		return (0);
2910248571Smm	}
2911185029Spjd
2912248571Smm	if (newval < ds->ds_phys->ds_referenced_bytes ||
2913248571Smm	    newval < ds->ds_reserved) {
2914248571Smm		dsl_dataset_rele(ds, FTAG);
2915249195Smm		return (SET_ERROR(ENOSPC));
2916248571Smm	}
2917185029Spjd
2918248571Smm	dsl_dataset_rele(ds, FTAG);
2919168404Spjd	return (0);
2920168404Spjd}
2921185029Spjd
2922248571Smmstatic void
2923248571Smmdsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx)
2924185029Spjd{
2925248571Smm	dsl_dataset_set_qr_arg_t *ddsqra = arg;
2926248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
2927248571Smm	dsl_dataset_t *ds;
2928248571Smm	uint64_t newval;
2929185029Spjd
2930248571Smm	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
2931185029Spjd
2932248571Smm	dsl_prop_set_sync_impl(ds,
2933248571Smm	    zfs_prop_to_name(ZFS_PROP_REFQUOTA),
2934248571Smm	    ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
2935248571Smm	    &ddsqra->ddsqra_value, tx);
2936248571Smm
2937248571Smm	VERIFY0(dsl_prop_get_int_ds(ds,
2938248571Smm	    zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval));
2939248571Smm
2940248571Smm	if (ds->ds_quota != newval) {
2941219089Spjd		dmu_buf_will_dirty(ds->ds_dbuf, tx);
2942248571Smm		ds->ds_quota = newval;
2943219089Spjd	}
2944248571Smm	dsl_dataset_rele(ds, FTAG);
2945185029Spjd}
2946185029Spjd
2947185029Spjdint
2948248571Smmdsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
2949248571Smm    uint64_t refquota)
2950185029Spjd{
2951248571Smm	dsl_dataset_set_qr_arg_t ddsqra;
2952185029Spjd
2953248571Smm	ddsqra.ddsqra_name = dsname;
2954248571Smm	ddsqra.ddsqra_source = source;
2955248571Smm	ddsqra.ddsqra_value = refquota;
2956219089Spjd
2957248571Smm	return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
2958269006Sdelphij	    dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
2959185029Spjd}
2960185029Spjd
2961185029Spjdstatic int
2962248571Smmdsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
2963185029Spjd{
2964248571Smm	dsl_dataset_set_qr_arg_t *ddsqra = arg;
2965248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
2966248571Smm	dsl_dataset_t *ds;
2967248571Smm	int error;
2968248571Smm	uint64_t newval, unique;
2969185029Spjd
2970248571Smm	if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)
2971249195Smm		return (SET_ERROR(ENOTSUP));
2972185029Spjd
2973248571Smm	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2974248571Smm	if (error != 0)
2975248571Smm		return (error);
2976248571Smm
2977248571Smm	if (dsl_dataset_is_snapshot(ds)) {
2978248571Smm		dsl_dataset_rele(ds, FTAG);
2979249195Smm		return (SET_ERROR(EINVAL));
2980248571Smm	}
2981185029Spjd
2982248571Smm	error = dsl_prop_predict(ds->ds_dir,
2983248571Smm	    zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
2984248571Smm	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2985248571Smm	if (error != 0) {
2986248571Smm		dsl_dataset_rele(ds, FTAG);
2987248571Smm		return (error);
2988248571Smm	}
2989219089Spjd
2990185029Spjd	/*
2991185029Spjd	 * If we are doing the preliminary check in open context, the
2992185029Spjd	 * space estimates may be inaccurate.
2993185029Spjd	 */
2994248571Smm	if (!dmu_tx_is_syncing(tx)) {
2995248571Smm		dsl_dataset_rele(ds, FTAG);
2996185029Spjd		return (0);
2997248571Smm	}
2998185029Spjd
2999185029Spjd	mutex_enter(&ds->ds_lock);
3000219089Spjd	if (!DS_UNIQUE_IS_ACCURATE(ds))
3001219089Spjd		dsl_dataset_recalc_head_uniq(ds);
3002219089Spjd	unique = ds->ds_phys->ds_unique_bytes;
3003185029Spjd	mutex_exit(&ds->ds_lock);
3004185029Spjd
3005248571Smm	if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) {
3006248571Smm		uint64_t delta = MAX(unique, newval) -
3007209962Smm		    MAX(unique, ds->ds_reserved);
3008185029Spjd
3009248571Smm		if (delta >
3010248571Smm		    dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) ||
3011248571Smm		    (ds->ds_quota > 0 && newval > ds->ds_quota)) {
3012248571Smm			dsl_dataset_rele(ds, FTAG);
3013249195Smm			return (SET_ERROR(ENOSPC));
3014248571Smm		}
3015209962Smm	}
3016209962Smm
3017248571Smm	dsl_dataset_rele(ds, FTAG);
3018185029Spjd	return (0);
3019185029Spjd}
3020185029Spjd
3021248571Smmvoid
3022248571Smmdsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
3023248571Smm    zprop_source_t source, uint64_t value, dmu_tx_t *tx)
3024185029Spjd{
3025248571Smm	uint64_t newval;
3026185029Spjd	uint64_t unique;
3027185029Spjd	int64_t delta;
3028185029Spjd
3029248571Smm	dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
3030248571Smm	    source, sizeof (value), 1, &value, tx);
3031219089Spjd
3032248571Smm	VERIFY0(dsl_prop_get_int_ds(ds,
3033248571Smm	    zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval));
3034248571Smm
3035185029Spjd	dmu_buf_will_dirty(ds->ds_dbuf, tx);
3036185029Spjd	mutex_enter(&ds->ds_dir->dd_lock);
3037185029Spjd	mutex_enter(&ds->ds_lock);
3038219089Spjd	ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
3039219089Spjd	unique = ds->ds_phys->ds_unique_bytes;
3040248571Smm	delta = MAX(0, (int64_t)(newval - unique)) -
3041185029Spjd	    MAX(0, (int64_t)(ds->ds_reserved - unique));
3042248571Smm	ds->ds_reserved = newval;
3043185029Spjd	mutex_exit(&ds->ds_lock);
3044185029Spjd
3045185029Spjd	dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
3046185029Spjd	mutex_exit(&ds->ds_dir->dd_lock);
3047185029Spjd}
3048185029Spjd
3049248571Smmstatic void
3050248571Smmdsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx)
3051185029Spjd{
3052248571Smm	dsl_dataset_set_qr_arg_t *ddsqra = arg;
3053248571Smm	dsl_pool_t *dp = dmu_tx_pool(tx);
3054185029Spjd	dsl_dataset_t *ds;
3055185029Spjd
3056248571Smm	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
3057248571Smm	dsl_dataset_set_refreservation_sync_impl(ds,
3058248571Smm	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx);
3059185029Spjd	dsl_dataset_rele(ds, FTAG);
3060185029Spjd}
3061219089Spjd
3062219089Spjdint
3063248571Smmdsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
3064248571Smm    uint64_t refreservation)
3065219089Spjd{
3066248571Smm	dsl_dataset_set_qr_arg_t ddsqra;
3067219089Spjd
3068248571Smm	ddsqra.ddsqra_name = dsname;
3069248571Smm	ddsqra.ddsqra_source = source;
3070248571Smm	ddsqra.ddsqra_value = refreservation;
3071219089Spjd
3072248571Smm	return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check,
3073269006Sdelphij	    dsl_dataset_set_refreservation_sync, &ddsqra,
3074269006Sdelphij	    0, ZFS_SPACE_CHECK_NONE));
3075219089Spjd}
3076219089Spjd
3077219089Spjd/*
3078228103Smm * Return (in *usedp) the amount of space written in new that is not
3079228103Smm * present in oldsnap.  New may be a snapshot or the head.  Old must be
3080228103Smm * a snapshot before new, in new's filesystem (or its origin).  If not then
3081228103Smm * fail and return EINVAL.
3082228103Smm *
3083228103Smm * The written space is calculated by considering two components:  First, we
3084228103Smm * ignore any freed space, and calculate the written as new's used space
3085228103Smm * minus old's used space.  Next, we add in the amount of space that was freed
3086228103Smm * between the two snapshots, thus reducing new's used space relative to old's.
3087228103Smm * Specifically, this is the space that was born before old->ds_creation_txg,
3088228103Smm * and freed before new (ie. on new's deadlist or a previous deadlist).
3089228103Smm *
3090228103Smm * space freed                         [---------------------]
3091228103Smm * snapshots                       ---O-------O--------O-------O------
3092228103Smm *                                         oldsnap            new
3093228103Smm */
3094228103Smmint
3095228103Smmdsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
3096228103Smm    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
3097228103Smm{
3098228103Smm	int err = 0;
3099228103Smm	uint64_t snapobj;
3100228103Smm	dsl_pool_t *dp = new->ds_dir->dd_pool;
3101228103Smm
3102248571Smm	ASSERT(dsl_pool_config_held(dp));
3103248571Smm
3104228103Smm	*usedp = 0;
3105236884Smm	*usedp += new->ds_phys->ds_referenced_bytes;
3106236884Smm	*usedp -= oldsnap->ds_phys->ds_referenced_bytes;
3107228103Smm
3108228103Smm	*compp = 0;
3109228103Smm	*compp += new->ds_phys->ds_compressed_bytes;
3110228103Smm	*compp -= oldsnap->ds_phys->ds_compressed_bytes;
3111228103Smm
3112228103Smm	*uncompp = 0;
3113228103Smm	*uncompp += new->ds_phys->ds_uncompressed_bytes;
3114228103Smm	*uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
3115228103Smm
3116228103Smm	snapobj = new->ds_object;
3117228103Smm	while (snapobj != oldsnap->ds_object) {
3118228103Smm		dsl_dataset_t *snap;
3119228103Smm		uint64_t used, comp, uncomp;
3120228103Smm
3121236884Smm		if (snapobj == new->ds_object) {
3122236884Smm			snap = new;
3123236884Smm		} else {
3124236884Smm			err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
3125236884Smm			if (err != 0)
3126236884Smm				break;
3127236884Smm		}
3128228103Smm
3129228103Smm		if (snap->ds_phys->ds_prev_snap_txg ==
3130228103Smm		    oldsnap->ds_phys->ds_creation_txg) {
3131228103Smm			/*
3132228103Smm			 * The blocks in the deadlist can not be born after
3133228103Smm			 * ds_prev_snap_txg, so get the whole deadlist space,
3134228103Smm			 * which is more efficient (especially for old-format
3135228103Smm			 * deadlists).  Unfortunately the deadlist code
3136228103Smm			 * doesn't have enough information to make this
3137228103Smm			 * optimization itself.
3138228103Smm			 */
3139228103Smm			dsl_deadlist_space(&snap->ds_deadlist,
3140228103Smm			    &used, &comp, &uncomp);
3141228103Smm		} else {
3142228103Smm			dsl_deadlist_space_range(&snap->ds_deadlist,
3143228103Smm			    0, oldsnap->ds_phys->ds_creation_txg,
3144228103Smm			    &used, &comp, &uncomp);
3145228103Smm		}
3146228103Smm		*usedp += used;
3147228103Smm		*compp += comp;
3148228103Smm		*uncompp += uncomp;
3149228103Smm
3150228103Smm		/*
3151228103Smm		 * If we get to the beginning of the chain of snapshots
3152228103Smm		 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
3153228103Smm		 * was not a snapshot of/before new.
3154228103Smm		 */
3155228103Smm		snapobj = snap->ds_phys->ds_prev_snap_obj;
3156236884Smm		if (snap != new)
3157236884Smm			dsl_dataset_rele(snap, FTAG);
3158228103Smm		if (snapobj == 0) {
3159249195Smm			err = SET_ERROR(EINVAL);
3160228103Smm			break;
3161228103Smm		}
3162228103Smm
3163228103Smm	}
3164228103Smm	return (err);
3165228103Smm}
3166228103Smm
3167228103Smm/*
3168228103Smm * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
3169228103Smm * lastsnap, and all snapshots in between are deleted.
3170228103Smm *
3171228103Smm * blocks that would be freed            [---------------------------]
3172228103Smm * snapshots                       ---O-------O--------O-------O--------O
3173228103Smm *                                        firstsnap        lastsnap
3174228103Smm *
3175228103Smm * This is the set of blocks that were born after the snap before firstsnap,
3176228103Smm * (birth > firstsnap->prev_snap_txg) and died before the snap after the
3177228103Smm * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
3178228103Smm * We calculate this by iterating over the relevant deadlists (from the snap
3179228103Smm * after lastsnap, backward to the snap after firstsnap), summing up the
3180228103Smm * space on the deadlist that was born after the snap before firstsnap.
3181228103Smm */
3182228103Smmint
3183228103Smmdsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
3184228103Smm    dsl_dataset_t *lastsnap,
3185228103Smm    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
3186228103Smm{
3187228103Smm	int err = 0;
3188228103Smm	uint64_t snapobj;
3189228103Smm	dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
3190228103Smm
3191228103Smm	ASSERT(dsl_dataset_is_snapshot(firstsnap));
3192228103Smm	ASSERT(dsl_dataset_is_snapshot(lastsnap));
3193228103Smm
3194228103Smm	/*
3195228103Smm	 * Check that the snapshots are in the same dsl_dir, and firstsnap
3196228103Smm	 * is before lastsnap.
3197228103Smm	 */
3198228103Smm	if (firstsnap->ds_dir != lastsnap->ds_dir ||
3199228103Smm	    firstsnap->ds_phys->ds_creation_txg >
3200228103Smm	    lastsnap->ds_phys->ds_creation_txg)
3201249195Smm		return (SET_ERROR(EINVAL));
3202228103Smm
3203228103Smm	*usedp = *compp = *uncompp = 0;
3204228103Smm
3205228103Smm	snapobj = lastsnap->ds_phys->ds_next_snap_obj;
3206228103Smm	while (snapobj != firstsnap->ds_object) {
3207228103Smm		dsl_dataset_t *ds;
3208228103Smm		uint64_t used, comp, uncomp;
3209228103Smm
3210228103Smm		err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
3211228103Smm		if (err != 0)
3212228103Smm			break;
3213228103Smm
3214228103Smm		dsl_deadlist_space_range(&ds->ds_deadlist,
3215228103Smm		    firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX,
3216228103Smm		    &used, &comp, &uncomp);
3217228103Smm		*usedp += used;
3218228103Smm		*compp += comp;
3219228103Smm		*uncompp += uncomp;
3220228103Smm
3221228103Smm		snapobj = ds->ds_phys->ds_prev_snap_obj;
3222228103Smm		ASSERT3U(snapobj, !=, 0);
3223228103Smm		dsl_dataset_rele(ds, FTAG);
3224228103Smm	}
3225228103Smm	return (err);
3226228103Smm}
3227248571Smm
3228248571Smm/*
3229248571Smm * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
3230248571Smm * For example, they could both be snapshots of the same filesystem, and
3231248571Smm * 'earlier' is before 'later'.  Or 'earlier' could be the origin of
3232248571Smm * 'later's filesystem.  Or 'earlier' could be an older snapshot in the origin's
3233248571Smm * filesystem.  Or 'earlier' could be the origin's origin.
3234263407Sdelphij *
3235263407Sdelphij * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg.
3236248571Smm */
3237248571Smmboolean_t
3238263407Sdelphijdsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
3239263407Sdelphij	uint64_t earlier_txg)
3240248571Smm{
3241248571Smm	dsl_pool_t *dp = later->ds_dir->dd_pool;
3242248571Smm	int error;
3243248571Smm	boolean_t ret;
3244248571Smm
3245248571Smm	ASSERT(dsl_pool_config_held(dp));
3246263407Sdelphij	ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0);
3247248571Smm
3248263407Sdelphij	if (earlier_txg == 0)
3249263407Sdelphij		earlier_txg = earlier->ds_phys->ds_creation_txg;
3250263407Sdelphij
3251263407Sdelphij	if (dsl_dataset_is_snapshot(later) &&
3252263407Sdelphij	    earlier_txg >= later->ds_phys->ds_creation_txg)
3253248571Smm		return (B_FALSE);
3254248571Smm
3255248571Smm	if (later->ds_dir == earlier->ds_dir)
3256248571Smm		return (B_TRUE);
3257248571Smm	if (!dsl_dir_is_clone(later->ds_dir))
3258248571Smm		return (B_FALSE);
3259248571Smm
3260248571Smm	if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object)
3261248571Smm		return (B_TRUE);
3262248571Smm	dsl_dataset_t *origin;
3263248571Smm	error = dsl_dataset_hold_obj(dp,
3264248571Smm	    later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
3265248571Smm	if (error != 0)
3266248571Smm		return (B_FALSE);
3267263407Sdelphij	ret = dsl_dataset_is_before(origin, earlier, earlier_txg);
3268248571Smm	dsl_dataset_rele(origin, FTAG);
3269248571Smm	return (ret);
3270248571Smm}
3271263390Sdelphij
3272263390Sdelphij
3273263390Sdelphijvoid
3274263390Sdelphijdsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx)
3275263390Sdelphij{
3276263390Sdelphij	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
3277263390Sdelphij	dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx);
3278263390Sdelphij}
3279