dsl_destroy.c revision 263407
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 */
26
27#include <sys/zfs_context.h>
28#include <sys/dsl_userhold.h>
29#include <sys/dsl_dataset.h>
30#include <sys/dsl_synctask.h>
31#include <sys/dmu_tx.h>
32#include <sys/dsl_pool.h>
33#include <sys/dsl_dir.h>
34#include <sys/dmu_traverse.h>
35#include <sys/dsl_scan.h>
36#include <sys/dmu_objset.h>
37#include <sys/zap.h>
38#include <sys/zfeature.h>
39#include <sys/zfs_ioctl.h>
40#include <sys/dsl_deleg.h>
41#include <sys/dmu_impl.h>
42
43typedef struct dmu_snapshots_destroy_arg {
44	nvlist_t *dsda_snaps;
45	nvlist_t *dsda_successful_snaps;
46	boolean_t dsda_defer;
47	nvlist_t *dsda_errlist;
48} dmu_snapshots_destroy_arg_t;
49
50int
51dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
52{
53	if (!dsl_dataset_is_snapshot(ds))
54		return (SET_ERROR(EINVAL));
55
56	if (dsl_dataset_long_held(ds))
57		return (SET_ERROR(EBUSY));
58
59	/*
60	 * Only allow deferred destroy on pools that support it.
61	 * NOTE: deferred destroy is only supported on snapshots.
62	 */
63	if (defer) {
64		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
65		    SPA_VERSION_USERREFS)
66			return (SET_ERROR(ENOTSUP));
67		return (0);
68	}
69
70	/*
71	 * If this snapshot has an elevated user reference count,
72	 * we can't destroy it yet.
73	 */
74	if (ds->ds_userrefs > 0)
75		return (SET_ERROR(EBUSY));
76
77	/*
78	 * Can't delete a branch point.
79	 */
80	if (ds->ds_phys->ds_num_children > 1)
81		return (SET_ERROR(EEXIST));
82
83	return (0);
84}
85
86static int
87dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
88{
89	dmu_snapshots_destroy_arg_t *dsda = arg;
90	dsl_pool_t *dp = dmu_tx_pool(tx);
91	nvpair_t *pair;
92	int error = 0;
93
94	if (!dmu_tx_is_syncing(tx))
95		return (0);
96
97	for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
98	    pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
99		dsl_dataset_t *ds;
100
101		error = dsl_dataset_hold(dp, nvpair_name(pair),
102		    FTAG, &ds);
103
104		/*
105		 * If the snapshot does not exist, silently ignore it
106		 * (it's "already destroyed").
107		 */
108		if (error == ENOENT)
109			continue;
110
111		if (error == 0) {
112			error = dsl_destroy_snapshot_check_impl(ds,
113			    dsda->dsda_defer);
114			dsl_dataset_rele(ds, FTAG);
115		}
116
117		if (error == 0) {
118			fnvlist_add_boolean(dsda->dsda_successful_snaps,
119			    nvpair_name(pair));
120		} else {
121			fnvlist_add_int32(dsda->dsda_errlist,
122			    nvpair_name(pair), error);
123		}
124	}
125
126	pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
127	if (pair != NULL)
128		return (fnvpair_value_int32(pair));
129
130	return (0);
131}
132
133struct process_old_arg {
134	dsl_dataset_t *ds;
135	dsl_dataset_t *ds_prev;
136	boolean_t after_branch_point;
137	zio_t *pio;
138	uint64_t used, comp, uncomp;
139};
140
141static int
142process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
143{
144	struct process_old_arg *poa = arg;
145	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
146
147	ASSERT(!BP_IS_HOLE(bp));
148
149	if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
150		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
151		if (poa->ds_prev && !poa->after_branch_point &&
152		    bp->blk_birth >
153		    poa->ds_prev->ds_phys->ds_prev_snap_txg) {
154			poa->ds_prev->ds_phys->ds_unique_bytes +=
155			    bp_get_dsize_sync(dp->dp_spa, bp);
156		}
157	} else {
158		poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
159		poa->comp += BP_GET_PSIZE(bp);
160		poa->uncomp += BP_GET_UCSIZE(bp);
161		dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
162	}
163	return (0);
164}
165
166static void
167process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
168    dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
169{
170	struct process_old_arg poa = { 0 };
171	dsl_pool_t *dp = ds->ds_dir->dd_pool;
172	objset_t *mos = dp->dp_meta_objset;
173	uint64_t deadlist_obj;
174
175	ASSERT(ds->ds_deadlist.dl_oldfmt);
176	ASSERT(ds_next->ds_deadlist.dl_oldfmt);
177
178	poa.ds = ds;
179	poa.ds_prev = ds_prev;
180	poa.after_branch_point = after_branch_point;
181	poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
182	VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
183	    process_old_cb, &poa, tx));
184	VERIFY0(zio_wait(poa.pio));
185	ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
186
187	/* change snapused */
188	dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
189	    -poa.used, -poa.comp, -poa.uncomp, tx);
190
191	/* swap next's deadlist to our deadlist */
192	dsl_deadlist_close(&ds->ds_deadlist);
193	dsl_deadlist_close(&ds_next->ds_deadlist);
194	deadlist_obj = ds->ds_phys->ds_deadlist_obj;
195	ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj;
196	ds_next->ds_phys->ds_deadlist_obj = deadlist_obj;
197	dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
198	dsl_deadlist_open(&ds_next->ds_deadlist, mos,
199	    ds_next->ds_phys->ds_deadlist_obj);
200}
201
202static void
203dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
204{
205	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
206	zap_cursor_t zc;
207	zap_attribute_t za;
208
209	/*
210	 * If it is the old version, dd_clones doesn't exist so we can't
211	 * find the clones, but dsl_deadlist_remove_key() is a no-op so it
212	 * doesn't matter.
213	 */
214	if (ds->ds_dir->dd_phys->dd_clones == 0)
215		return;
216
217	for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
218	    zap_cursor_retrieve(&zc, &za) == 0;
219	    zap_cursor_advance(&zc)) {
220		dsl_dataset_t *clone;
221
222		VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
223		    za.za_first_integer, FTAG, &clone));
224		if (clone->ds_dir->dd_origin_txg > mintxg) {
225			dsl_deadlist_remove_key(&clone->ds_deadlist,
226			    mintxg, tx);
227			dsl_dataset_remove_clones_key(clone, mintxg, tx);
228		}
229		dsl_dataset_rele(clone, FTAG);
230	}
231	zap_cursor_fini(&zc);
232}
233
234void
235dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
236{
237	int err;
238	int after_branch_point = FALSE;
239	dsl_pool_t *dp = ds->ds_dir->dd_pool;
240	objset_t *mos = dp->dp_meta_objset;
241	dsl_dataset_t *ds_prev = NULL;
242	uint64_t obj;
243
244	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
245	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
246	ASSERT(refcount_is_zero(&ds->ds_longholds));
247
248	if (defer &&
249	    (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) {
250		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
251		dmu_buf_will_dirty(ds->ds_dbuf, tx);
252		ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
253		spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
254		return;
255	}
256
257	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
258
259	/* We need to log before removing it from the namespace. */
260	spa_history_log_internal_ds(ds, "destroy", tx, "");
261
262	dsl_scan_ds_destroyed(ds, tx);
263
264	obj = ds->ds_object;
265
266	if (ds->ds_phys->ds_prev_snap_obj != 0) {
267		ASSERT3P(ds->ds_prev, ==, NULL);
268		VERIFY0(dsl_dataset_hold_obj(dp,
269		    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
270		after_branch_point =
271		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
272
273		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
274		if (after_branch_point &&
275		    ds_prev->ds_phys->ds_next_clones_obj != 0) {
276			dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
277			if (ds->ds_phys->ds_next_snap_obj != 0) {
278				VERIFY0(zap_add_int(mos,
279				    ds_prev->ds_phys->ds_next_clones_obj,
280				    ds->ds_phys->ds_next_snap_obj, tx));
281			}
282		}
283		if (!after_branch_point) {
284			ds_prev->ds_phys->ds_next_snap_obj =
285			    ds->ds_phys->ds_next_snap_obj;
286		}
287	}
288
289	dsl_dataset_t *ds_next;
290	uint64_t old_unique;
291	uint64_t used = 0, comp = 0, uncomp = 0;
292
293	VERIFY0(dsl_dataset_hold_obj(dp,
294	    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
295	ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
296
297	old_unique = ds_next->ds_phys->ds_unique_bytes;
298
299	dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
300	ds_next->ds_phys->ds_prev_snap_obj =
301	    ds->ds_phys->ds_prev_snap_obj;
302	ds_next->ds_phys->ds_prev_snap_txg =
303	    ds->ds_phys->ds_prev_snap_txg;
304	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
305	    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
306
307	if (ds_next->ds_deadlist.dl_oldfmt) {
308		process_old_deadlist(ds, ds_prev, ds_next,
309		    after_branch_point, tx);
310	} else {
311		/* Adjust prev's unique space. */
312		if (ds_prev && !after_branch_point) {
313			dsl_deadlist_space_range(&ds_next->ds_deadlist,
314			    ds_prev->ds_phys->ds_prev_snap_txg,
315			    ds->ds_phys->ds_prev_snap_txg,
316			    &used, &comp, &uncomp);
317			ds_prev->ds_phys->ds_unique_bytes += used;
318		}
319
320		/* Adjust snapused. */
321		dsl_deadlist_space_range(&ds_next->ds_deadlist,
322		    ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
323		    &used, &comp, &uncomp);
324		dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
325		    -used, -comp, -uncomp, tx);
326
327		/* Move blocks to be freed to pool's free list. */
328		dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
329		    &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
330		    tx);
331		dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
332		    DD_USED_HEAD, used, comp, uncomp, tx);
333
334		/* Merge our deadlist into next's and free it. */
335		dsl_deadlist_merge(&ds_next->ds_deadlist,
336		    ds->ds_phys->ds_deadlist_obj, tx);
337	}
338	dsl_deadlist_close(&ds->ds_deadlist);
339	dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
340	dmu_buf_will_dirty(ds->ds_dbuf, tx);
341	ds->ds_phys->ds_deadlist_obj = 0;
342
343	/* Collapse range in clone heads */
344	dsl_dataset_remove_clones_key(ds,
345	    ds->ds_phys->ds_creation_txg, tx);
346
347	if (dsl_dataset_is_snapshot(ds_next)) {
348		dsl_dataset_t *ds_nextnext;
349
350		/*
351		 * Update next's unique to include blocks which
352		 * were previously shared by only this snapshot
353		 * and it.  Those blocks will be born after the
354		 * prev snap and before this snap, and will have
355		 * died after the next snap and before the one
356		 * after that (ie. be on the snap after next's
357		 * deadlist).
358		 */
359		VERIFY0(dsl_dataset_hold_obj(dp,
360		    ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext));
361		dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
362		    ds->ds_phys->ds_prev_snap_txg,
363		    ds->ds_phys->ds_creation_txg,
364		    &used, &comp, &uncomp);
365		ds_next->ds_phys->ds_unique_bytes += used;
366		dsl_dataset_rele(ds_nextnext, FTAG);
367		ASSERT3P(ds_next->ds_prev, ==, NULL);
368
369		/* Collapse range in this head. */
370		dsl_dataset_t *hds;
371		VERIFY0(dsl_dataset_hold_obj(dp,
372		    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds));
373		dsl_deadlist_remove_key(&hds->ds_deadlist,
374		    ds->ds_phys->ds_creation_txg, tx);
375		dsl_dataset_rele(hds, FTAG);
376
377	} else {
378		ASSERT3P(ds_next->ds_prev, ==, ds);
379		dsl_dataset_rele(ds_next->ds_prev, ds_next);
380		ds_next->ds_prev = NULL;
381		if (ds_prev) {
382			VERIFY0(dsl_dataset_hold_obj(dp,
383			    ds->ds_phys->ds_prev_snap_obj,
384			    ds_next, &ds_next->ds_prev));
385		}
386
387		dsl_dataset_recalc_head_uniq(ds_next);
388
389		/*
390		 * Reduce the amount of our unconsumed refreservation
391		 * being charged to our parent by the amount of
392		 * new unique data we have gained.
393		 */
394		if (old_unique < ds_next->ds_reserved) {
395			int64_t mrsdelta;
396			uint64_t new_unique =
397			    ds_next->ds_phys->ds_unique_bytes;
398
399			ASSERT(old_unique <= new_unique);
400			mrsdelta = MIN(new_unique - old_unique,
401			    ds_next->ds_reserved - old_unique);
402			dsl_dir_diduse_space(ds->ds_dir,
403			    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
404		}
405	}
406	dsl_dataset_rele(ds_next, FTAG);
407
408	/*
409	 * This must be done after the dsl_traverse(), because it will
410	 * re-open the objset.
411	 */
412	if (ds->ds_objset) {
413		dmu_objset_evict(ds->ds_objset);
414		ds->ds_objset = NULL;
415	}
416
417	/* remove from snapshot namespace */
418	dsl_dataset_t *ds_head;
419	ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
420	VERIFY0(dsl_dataset_hold_obj(dp,
421	    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
422	VERIFY0(dsl_dataset_get_snapname(ds));
423#ifdef ZFS_DEBUG
424	{
425		uint64_t val;
426
427		err = dsl_dataset_snap_lookup(ds_head,
428		    ds->ds_snapname, &val);
429		ASSERT0(err);
430		ASSERT3U(val, ==, obj);
431	}
432#endif
433	VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx));
434	dsl_dataset_rele(ds_head, FTAG);
435
436	if (ds_prev != NULL)
437		dsl_dataset_rele(ds_prev, FTAG);
438
439	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
440
441	if (ds->ds_phys->ds_next_clones_obj != 0) {
442		uint64_t count;
443		ASSERT0(zap_count(mos,
444		    ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
445		VERIFY0(dmu_object_free(mos,
446		    ds->ds_phys->ds_next_clones_obj, tx));
447	}
448	if (ds->ds_phys->ds_props_obj != 0)
449		VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
450	if (ds->ds_phys->ds_userrefs_obj != 0)
451		VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
452	dsl_dir_rele(ds->ds_dir, ds);
453	ds->ds_dir = NULL;
454	dmu_object_free_zapified(mos, obj, tx);
455}
456
457static void
458dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
459{
460	dmu_snapshots_destroy_arg_t *dsda = arg;
461	dsl_pool_t *dp = dmu_tx_pool(tx);
462	nvpair_t *pair;
463
464	for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
465	    pair != NULL;
466	    pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
467		dsl_dataset_t *ds;
468
469		VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
470
471		dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
472		dsl_dataset_rele(ds, FTAG);
473	}
474}
475
476/*
477 * The semantics of this function are described in the comment above
478 * lzc_destroy_snaps().  To summarize:
479 *
480 * The snapshots must all be in the same pool.
481 *
482 * Snapshots that don't exist will be silently ignored (considered to be
483 * "already deleted").
484 *
485 * On success, all snaps will be destroyed and this will return 0.
486 * On failure, no snaps will be destroyed, the errlist will be filled in,
487 * and this will return an errno.
488 */
489int
490dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
491    nvlist_t *errlist)
492{
493	dmu_snapshots_destroy_arg_t dsda;
494	int error;
495	nvpair_t *pair;
496
497	pair = nvlist_next_nvpair(snaps, NULL);
498	if (pair == NULL)
499		return (0);
500
501	dsda.dsda_snaps = snaps;
502	dsda.dsda_successful_snaps = fnvlist_alloc();
503	dsda.dsda_defer = defer;
504	dsda.dsda_errlist = errlist;
505
506	error = dsl_sync_task(nvpair_name(pair),
507	    dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
508	    &dsda, 0);
509	fnvlist_free(dsda.dsda_successful_snaps);
510
511	return (error);
512}
513
514int
515dsl_destroy_snapshot(const char *name, boolean_t defer)
516{
517	int error;
518	nvlist_t *nvl = fnvlist_alloc();
519	nvlist_t *errlist = fnvlist_alloc();
520
521	fnvlist_add_boolean(nvl, name);
522	error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
523	fnvlist_free(errlist);
524	fnvlist_free(nvl);
525	return (error);
526}
527
528struct killarg {
529	dsl_dataset_t *ds;
530	dmu_tx_t *tx;
531};
532
533/* ARGSUSED */
534static int
535kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
536    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
537{
538	struct killarg *ka = arg;
539	dmu_tx_t *tx = ka->tx;
540
541	if (BP_IS_HOLE(bp))
542		return (0);
543
544	if (zb->zb_level == ZB_ZIL_LEVEL) {
545		ASSERT(zilog != NULL);
546		/*
547		 * It's a block in the intent log.  It has no
548		 * accounting, so just free it.
549		 */
550		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
551	} else {
552		ASSERT(zilog == NULL);
553		ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
554		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
555	}
556
557	return (0);
558}
559
560static void
561old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
562{
563	struct killarg ka;
564
565	/*
566	 * Free everything that we point to (that's born after
567	 * the previous snapshot, if we are a clone)
568	 *
569	 * NB: this should be very quick, because we already
570	 * freed all the objects in open context.
571	 */
572	ka.ds = ds;
573	ka.tx = tx;
574	VERIFY0(traverse_dataset(ds,
575	    ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
576	    kill_blkptr, &ka));
577	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
578}
579
580typedef struct dsl_destroy_head_arg {
581	const char *ddha_name;
582} dsl_destroy_head_arg_t;
583
584int
585dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
586{
587	int error;
588	uint64_t count;
589	objset_t *mos;
590
591	if (dsl_dataset_is_snapshot(ds))
592		return (SET_ERROR(EINVAL));
593
594	if (refcount_count(&ds->ds_longholds) != expected_holds)
595		return (SET_ERROR(EBUSY));
596
597	mos = ds->ds_dir->dd_pool->dp_meta_objset;
598
599	/*
600	 * Can't delete a head dataset if there are snapshots of it.
601	 * (Except if the only snapshots are from the branch we cloned
602	 * from.)
603	 */
604	if (ds->ds_prev != NULL &&
605	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
606		return (SET_ERROR(EBUSY));
607
608	/*
609	 * Can't delete if there are children of this fs.
610	 */
611	error = zap_count(mos,
612	    ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
613	if (error != 0)
614		return (error);
615	if (count != 0)
616		return (SET_ERROR(EEXIST));
617
618	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
619	    ds->ds_prev->ds_phys->ds_num_children == 2 &&
620	    ds->ds_prev->ds_userrefs == 0) {
621		/* We need to remove the origin snapshot as well. */
622		if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
623			return (SET_ERROR(EBUSY));
624	}
625	return (0);
626}
627
628static int
629dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
630{
631	dsl_destroy_head_arg_t *ddha = arg;
632	dsl_pool_t *dp = dmu_tx_pool(tx);
633	dsl_dataset_t *ds;
634	int error;
635
636	error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
637	if (error != 0)
638		return (error);
639
640	error = dsl_destroy_head_check_impl(ds, 0);
641	dsl_dataset_rele(ds, FTAG);
642	return (error);
643}
644
645static void
646dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
647{
648	dsl_dir_t *dd;
649	dsl_pool_t *dp = dmu_tx_pool(tx);
650	objset_t *mos = dp->dp_meta_objset;
651	dd_used_t t;
652
653	ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
654
655	VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
656
657	ASSERT0(dd->dd_phys->dd_head_dataset_obj);
658
659	/*
660	 * Remove our reservation. The impl() routine avoids setting the
661	 * actual property, which would require the (already destroyed) ds.
662	 */
663	dsl_dir_set_reservation_sync_impl(dd, 0, tx);
664
665	ASSERT0(dd->dd_phys->dd_used_bytes);
666	ASSERT0(dd->dd_phys->dd_reserved);
667	for (t = 0; t < DD_USED_NUM; t++)
668		ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
669
670	VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
671	VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
672	VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
673	VERIFY0(zap_remove(mos,
674	    dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
675
676	dsl_dir_rele(dd, FTAG);
677	dmu_object_free_zapified(mos, ddobj, tx);
678}
679
680void
681dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
682{
683	dsl_pool_t *dp = dmu_tx_pool(tx);
684	objset_t *mos = dp->dp_meta_objset;
685	uint64_t obj, ddobj, prevobj = 0;
686	boolean_t rmorigin;
687
688	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
689	ASSERT(ds->ds_prev == NULL ||
690	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
691	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
692	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
693
694	/* We need to log before removing it from the namespace. */
695	spa_history_log_internal_ds(ds, "destroy", tx, "");
696
697	rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
698	    DS_IS_DEFER_DESTROY(ds->ds_prev) &&
699	    ds->ds_prev->ds_phys->ds_num_children == 2 &&
700	    ds->ds_prev->ds_userrefs == 0);
701
702	/* Remove our reservation */
703	if (ds->ds_reserved != 0) {
704		dsl_dataset_set_refreservation_sync_impl(ds,
705		    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
706		    0, tx);
707		ASSERT0(ds->ds_reserved);
708	}
709
710	dsl_scan_ds_destroyed(ds, tx);
711
712	obj = ds->ds_object;
713
714	if (ds->ds_phys->ds_prev_snap_obj != 0) {
715		/* This is a clone */
716		ASSERT(ds->ds_prev != NULL);
717		ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj);
718		ASSERT0(ds->ds_phys->ds_next_snap_obj);
719
720		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
721		if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) {
722			dsl_dataset_remove_from_next_clones(ds->ds_prev,
723			    obj, tx);
724		}
725
726		ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1);
727		ds->ds_prev->ds_phys->ds_num_children--;
728	}
729
730	/*
731	 * Destroy the deadlist.  Unless it's a clone, the
732	 * deadlist should be empty.  (If it's a clone, it's
733	 * safe to ignore the deadlist contents.)
734	 */
735	dsl_deadlist_close(&ds->ds_deadlist);
736	dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
737	dmu_buf_will_dirty(ds->ds_dbuf, tx);
738	ds->ds_phys->ds_deadlist_obj = 0;
739
740	objset_t *os;
741	VERIFY0(dmu_objset_from_ds(ds, &os));
742
743	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
744		old_synchronous_dataset_destroy(ds, tx);
745	} else {
746		/*
747		 * Move the bptree into the pool's list of trees to
748		 * clean up and update space accounting information.
749		 */
750		uint64_t used, comp, uncomp;
751
752		zil_destroy_sync(dmu_objset_zil(os), tx);
753
754		if (!spa_feature_is_active(dp->dp_spa,
755		    SPA_FEATURE_ASYNC_DESTROY)) {
756			dsl_scan_t *scn = dp->dp_scan;
757			spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
758			    tx);
759			dp->dp_bptree_obj = bptree_alloc(mos, tx);
760			VERIFY0(zap_add(mos,
761			    DMU_POOL_DIRECTORY_OBJECT,
762			    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
763			    &dp->dp_bptree_obj, tx));
764			ASSERT(!scn->scn_async_destroying);
765			scn->scn_async_destroying = B_TRUE;
766		}
767
768		used = ds->ds_dir->dd_phys->dd_used_bytes;
769		comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
770		uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
771
772		ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
773		    ds->ds_phys->ds_unique_bytes == used);
774
775		bptree_add(mos, dp->dp_bptree_obj,
776		    &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
777		    used, comp, uncomp, tx);
778		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
779		    -used, -comp, -uncomp, tx);
780		dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
781		    used, comp, uncomp, tx);
782	}
783
784	if (ds->ds_prev != NULL) {
785		if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
786			VERIFY0(zap_remove_int(mos,
787			    ds->ds_prev->ds_dir->dd_phys->dd_clones,
788			    ds->ds_object, tx));
789		}
790		prevobj = ds->ds_prev->ds_object;
791		dsl_dataset_rele(ds->ds_prev, ds);
792		ds->ds_prev = NULL;
793	}
794
795	/*
796	 * This must be done after the dsl_traverse(), because it will
797	 * re-open the objset.
798	 */
799	if (ds->ds_objset) {
800		dmu_objset_evict(ds->ds_objset);
801		ds->ds_objset = NULL;
802	}
803
804	/* Erase the link in the dir */
805	dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
806	ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
807	ddobj = ds->ds_dir->dd_object;
808	ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
809	VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx));
810
811	if (ds->ds_bookmarks != 0) {
812		VERIFY0(zap_destroy(mos,
813		    ds->ds_bookmarks, tx));
814		spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
815	}
816
817	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
818
819	ASSERT0(ds->ds_phys->ds_next_clones_obj);
820	ASSERT0(ds->ds_phys->ds_props_obj);
821	ASSERT0(ds->ds_phys->ds_userrefs_obj);
822	dsl_dir_rele(ds->ds_dir, ds);
823	ds->ds_dir = NULL;
824	dmu_object_free_zapified(mos, obj, tx);
825
826	dsl_dir_destroy_sync(ddobj, tx);
827
828	if (rmorigin) {
829		dsl_dataset_t *prev;
830		VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
831		dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
832		dsl_dataset_rele(prev, FTAG);
833	}
834}
835
836static void
837dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
838{
839	dsl_destroy_head_arg_t *ddha = arg;
840	dsl_pool_t *dp = dmu_tx_pool(tx);
841	dsl_dataset_t *ds;
842
843	VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
844	dsl_destroy_head_sync_impl(ds, tx);
845	dsl_dataset_rele(ds, FTAG);
846}
847
848static void
849dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
850{
851	dsl_destroy_head_arg_t *ddha = arg;
852	dsl_pool_t *dp = dmu_tx_pool(tx);
853	dsl_dataset_t *ds;
854
855	VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
856
857	/* Mark it as inconsistent on-disk, in case we crash */
858	dmu_buf_will_dirty(ds->ds_dbuf, tx);
859	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
860
861	spa_history_log_internal_ds(ds, "destroy begin", tx, "");
862	dsl_dataset_rele(ds, FTAG);
863}
864
865int
866dsl_destroy_head(const char *name)
867{
868	dsl_destroy_head_arg_t ddha;
869	int error;
870	spa_t *spa;
871	boolean_t isenabled;
872
873#ifdef _KERNEL
874	zfs_destroy_unmount_origin(name);
875#endif
876
877	error = spa_open(name, &spa, FTAG);
878	if (error != 0)
879		return (error);
880	isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
881	spa_close(spa, FTAG);
882
883	ddha.ddha_name = name;
884
885	if (!isenabled) {
886		objset_t *os;
887
888		error = dsl_sync_task(name, dsl_destroy_head_check,
889		    dsl_destroy_head_begin_sync, &ddha, 0);
890		if (error != 0)
891			return (error);
892
893		/*
894		 * Head deletion is processed in one txg on old pools;
895		 * remove the objects from open context so that the txg sync
896		 * is not too long.
897		 */
898		error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
899		if (error == 0) {
900			uint64_t prev_snap_txg =
901			    dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg;
902			for (uint64_t obj = 0; error == 0;
903			    error = dmu_object_next(os, &obj, FALSE,
904			    prev_snap_txg))
905				(void) dmu_free_long_object(os, obj);
906			/* sync out all frees */
907			txg_wait_synced(dmu_objset_pool(os), 0);
908			dmu_objset_disown(os, FTAG);
909		}
910	}
911
912	return (dsl_sync_task(name, dsl_destroy_head_check,
913	    dsl_destroy_head_sync, &ddha, 0));
914}
915
916/*
917 * Note, this function is used as the callback for dmu_objset_find().  We
918 * always return 0 so that we will continue to find and process
919 * inconsistent datasets, even if we encounter an error trying to
920 * process one of them.
921 */
922/* ARGSUSED */
923int
924dsl_destroy_inconsistent(const char *dsname, void *arg)
925{
926	objset_t *os;
927
928	if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
929		boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
930		dmu_objset_rele(os, FTAG);
931		if (inconsistent)
932			(void) dsl_destroy_head(dsname);
933	}
934	return (0);
935}
936