dsl_userhold.c revision 268649
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 */
26
27#include <sys/zfs_context.h>
28#include <sys/dsl_userhold.h>
29#include <sys/dsl_dataset.h>
30#include <sys/dsl_destroy.h>
31#include <sys/dsl_synctask.h>
32#include <sys/dmu_tx.h>
33#include <sys/zfs_onexit.h>
34#include <sys/dsl_pool.h>
35#include <sys/dsl_dir.h>
36#include <sys/zfs_ioctl.h>
37#include <sys/zap.h>
38
39typedef struct dsl_dataset_user_hold_arg {
40	nvlist_t *dduha_holds;
41	nvlist_t *dduha_chkholds;
42	nvlist_t *dduha_errlist;
43	minor_t dduha_minor;
44} dsl_dataset_user_hold_arg_t;
45
46/*
47 * If you add new checks here, you may need to add additional checks to the
48 * "temporary" case in snapshot_check() in dmu_objset.c.
49 */
50int
51dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
52    boolean_t temphold, dmu_tx_t *tx)
53{
54	dsl_pool_t *dp = dmu_tx_pool(tx);
55	objset_t *mos = dp->dp_meta_objset;
56	int error = 0;
57
58	ASSERT(dsl_pool_config_held(dp));
59
60	if (strlen(htag) > MAXNAMELEN)
61		return (SET_ERROR(E2BIG));
62	/* Tempholds have a more restricted length */
63	if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
64		return (SET_ERROR(E2BIG));
65
66	/* tags must be unique (if ds already exists) */
67	if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {
68		uint64_t value;
69
70		error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
71		    htag, 8, 1, &value);
72		if (error == 0)
73			error = SET_ERROR(EEXIST);
74		else if (error == ENOENT)
75			error = 0;
76	}
77
78	return (error);
79}
80
81static int
82dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
83{
84	dsl_dataset_user_hold_arg_t *dduha = arg;
85	dsl_pool_t *dp = dmu_tx_pool(tx);
86
87	if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
88		return (SET_ERROR(ENOTSUP));
89
90	if (!dmu_tx_is_syncing(tx))
91		return (0);
92
93	for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_holds, NULL);
94	    pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
95		dsl_dataset_t *ds;
96		int error = 0;
97		char *htag, *name;
98
99		/* must be a snapshot */
100		name = nvpair_name(pair);
101		if (strchr(name, '@') == NULL)
102			error = SET_ERROR(EINVAL);
103
104		if (error == 0)
105			error = nvpair_value_string(pair, &htag);
106
107		if (error == 0)
108			error = dsl_dataset_hold(dp, name, FTAG, &ds);
109
110		if (error == 0) {
111			error = dsl_dataset_user_hold_check_one(ds, htag,
112			    dduha->dduha_minor != 0, tx);
113			dsl_dataset_rele(ds, FTAG);
114		}
115
116		if (error == 0) {
117			fnvlist_add_string(dduha->dduha_chkholds, name, htag);
118		} else {
119			/*
120			 * We register ENOENT errors so they can be correctly
121			 * reported if needed, such as when all holds fail.
122			 */
123			fnvlist_add_int32(dduha->dduha_errlist, name, error);
124			if (error != ENOENT)
125				return (error);
126		}
127	}
128
129	return (0);
130}
131
132
133static void
134dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
135    const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
136{
137	dsl_pool_t *dp = ds->ds_dir->dd_pool;
138	objset_t *mos = dp->dp_meta_objset;
139	uint64_t zapobj;
140
141	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
142
143	if (ds->ds_phys->ds_userrefs_obj == 0) {
144		/*
145		 * This is the first user hold for this dataset.  Create
146		 * the userrefs zap object.
147		 */
148		dmu_buf_will_dirty(ds->ds_dbuf, tx);
149		zapobj = ds->ds_phys->ds_userrefs_obj =
150		    zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
151	} else {
152		zapobj = ds->ds_phys->ds_userrefs_obj;
153	}
154	ds->ds_userrefs++;
155
156	VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
157
158	if (minor != 0) {
159		char name[MAXNAMELEN];
160		nvlist_t *tags;
161
162		VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
163		    htag, now, tx));
164		(void) snprintf(name, sizeof (name), "%llx",
165		    (u_longlong_t)ds->ds_object);
166
167		if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
168			tags = fnvlist_alloc();
169			fnvlist_add_boolean(tags, htag);
170			fnvlist_add_nvlist(tmpholds, name, tags);
171			fnvlist_free(tags);
172		} else {
173			fnvlist_add_boolean(tags, htag);
174		}
175	}
176
177	spa_history_log_internal_ds(ds, "hold", tx,
178	    "tag=%s temp=%d refs=%llu",
179	    htag, minor != 0, ds->ds_userrefs);
180}
181
182typedef struct zfs_hold_cleanup_arg {
183	char zhca_spaname[MAXNAMELEN];
184	uint64_t zhca_spa_load_guid;
185	nvlist_t *zhca_holds;
186} zfs_hold_cleanup_arg_t;
187
188static void
189dsl_dataset_user_release_onexit(void *arg)
190{
191	zfs_hold_cleanup_arg_t *ca = arg;
192	spa_t *spa;
193	int error;
194
195	error = spa_open(ca->zhca_spaname, &spa, FTAG);
196	if (error != 0) {
197		zfs_dbgmsg("couldn't release holds on pool=%s "
198		    "because pool is no longer loaded",
199		    ca->zhca_spaname);
200		return;
201	}
202	if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
203		zfs_dbgmsg("couldn't release holds on pool=%s "
204		    "because pool is no longer loaded (guid doesn't match)",
205		    ca->zhca_spaname);
206		spa_close(spa, FTAG);
207		return;
208	}
209
210	(void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
211	fnvlist_free(ca->zhca_holds);
212	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
213	spa_close(spa, FTAG);
214}
215
216static void
217dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
218{
219	zfs_hold_cleanup_arg_t *ca;
220
221	if (minor == 0 || nvlist_empty(holds)) {
222		fnvlist_free(holds);
223		return;
224	}
225
226	ASSERT(spa != NULL);
227	ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
228
229	(void) strlcpy(ca->zhca_spaname, spa_name(spa),
230	    sizeof (ca->zhca_spaname));
231	ca->zhca_spa_load_guid = spa_load_guid(spa);
232	ca->zhca_holds = holds;
233	VERIFY0(zfs_onexit_add_cb(minor,
234	    dsl_dataset_user_release_onexit, ca, NULL));
235}
236
237void
238dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
239    minor_t minor, uint64_t now, dmu_tx_t *tx)
240{
241	nvlist_t *tmpholds;
242
243	if (minor != 0)
244		tmpholds = fnvlist_alloc();
245	else
246		tmpholds = NULL;
247	dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
248	dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor);
249}
250
251static void
252dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
253{
254	dsl_dataset_user_hold_arg_t *dduha = arg;
255	dsl_pool_t *dp = dmu_tx_pool(tx);
256	nvlist_t *tmpholds;
257	uint64_t now = gethrestime_sec();
258
259	if (dduha->dduha_minor != 0)
260		tmpholds = fnvlist_alloc();
261	else
262		tmpholds = NULL;
263	for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
264	    pair != NULL;
265	    pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
266		dsl_dataset_t *ds;
267
268		VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
269		dsl_dataset_user_hold_sync_one_impl(tmpholds, ds,
270		    fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
271		dsl_dataset_rele(ds, FTAG);
272	}
273	dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor);
274}
275
276/*
277 * The full semantics of this function are described in the comment above
278 * lzc_hold().
279 *
280 * To summarize:
281 * holds is nvl of snapname -> holdname
282 * errlist will be filled in with snapname -> error
283 *
284 * The snaphosts must all be in the same pool.
285 *
286 * Holds for snapshots that don't exist will be skipped.
287 *
288 * If none of the snapshots for requested holds exist then ENOENT will be
289 * returned.
290 *
291 * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
292 * up when the process exits.
293 *
294 * On success all the holds, for snapshots that existed, will be created and 0
295 * will be returned.
296 *
297 * On failure no holds will be created, the errlist will be filled in,
298 * and an errno will returned.
299 *
300 * In all cases the errlist will contain entries for holds where the snapshot
301 * didn't exist.
302 */
303int
304dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
305{
306	dsl_dataset_user_hold_arg_t dduha;
307	nvpair_t *pair;
308	int ret;
309
310	pair = nvlist_next_nvpair(holds, NULL);
311	if (pair == NULL)
312		return (0);
313
314	dduha.dduha_holds = holds;
315	dduha.dduha_chkholds = fnvlist_alloc();
316	dduha.dduha_errlist = errlist;
317	dduha.dduha_minor = cleanup_minor;
318
319	ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
320	    dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
321	fnvlist_free(dduha.dduha_chkholds);
322
323	return (ret);
324}
325
326typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
327    dsl_dataset_t **dsp);
328
329typedef struct dsl_dataset_user_release_arg {
330	dsl_holdfunc_t *ddura_holdfunc;
331	nvlist_t *ddura_holds;
332	nvlist_t *ddura_todelete;
333	nvlist_t *ddura_errlist;
334	nvlist_t *ddura_chkholds;
335} dsl_dataset_user_release_arg_t;
336
337/* Place a dataset hold on the snapshot identified by passed dsobj string */
338static int
339dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
340    dsl_dataset_t **dsp)
341{
342	return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp));
343}
344
345static int
346dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
347    dsl_dataset_t *ds, nvlist_t *holds, const char *snapname)
348{
349	uint64_t zapobj;
350	nvlist_t *holds_found;
351	objset_t *mos;
352	int numholds;
353
354	if (!dsl_dataset_is_snapshot(ds))
355		return (SET_ERROR(EINVAL));
356
357	if (nvlist_empty(holds))
358		return (0);
359
360	numholds = 0;
361	mos = ds->ds_dir->dd_pool->dp_meta_objset;
362	zapobj = ds->ds_phys->ds_userrefs_obj;
363	holds_found = fnvlist_alloc();
364
365	for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
366	    pair = nvlist_next_nvpair(holds, pair)) {
367		uint64_t tmp;
368		int error;
369		const char *holdname = nvpair_name(pair);
370
371		if (zapobj != 0)
372			error = zap_lookup(mos, zapobj, holdname, 8, 1, &tmp);
373		else
374			error = SET_ERROR(ENOENT);
375
376		/*
377		 * Non-existent holds are put on the errlist, but don't
378		 * cause an overall failure.
379		 */
380		if (error == ENOENT) {
381			if (ddura->ddura_errlist != NULL) {
382				char *errtag = kmem_asprintf("%s#%s",
383				    snapname, holdname);
384				fnvlist_add_int32(ddura->ddura_errlist, errtag,
385				    ENOENT);
386				strfree(errtag);
387			}
388			continue;
389		}
390
391		if (error != 0) {
392			fnvlist_free(holds_found);
393			return (error);
394		}
395
396		fnvlist_add_boolean(holds_found, holdname);
397		numholds++;
398	}
399
400	if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
401	    ds->ds_userrefs == numholds) {
402		/* we need to destroy the snapshot as well */
403		if (dsl_dataset_long_held(ds)) {
404			fnvlist_free(holds_found);
405			return (SET_ERROR(EBUSY));
406		}
407		fnvlist_add_boolean(ddura->ddura_todelete, snapname);
408	}
409
410	if (numholds != 0) {
411		fnvlist_add_nvlist(ddura->ddura_chkholds, snapname,
412		    holds_found);
413	}
414	fnvlist_free(holds_found);
415
416	return (0);
417}
418
419static int
420dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
421{
422	dsl_dataset_user_release_arg_t *ddura;
423	dsl_holdfunc_t *holdfunc;
424	dsl_pool_t *dp;
425
426	if (!dmu_tx_is_syncing(tx))
427		return (0);
428
429	dp = dmu_tx_pool(tx);
430
431	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
432
433	ddura = arg;
434	holdfunc = ddura->ddura_holdfunc;
435
436	for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_holds, NULL);
437	    pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
438		int error;
439		dsl_dataset_t *ds;
440		nvlist_t *holds;
441		const char *snapname = nvpair_name(pair);
442
443		error = nvpair_value_nvlist(pair, &holds);
444		if (error != 0)
445			error = (SET_ERROR(EINVAL));
446		else
447			error = holdfunc(dp, snapname, FTAG, &ds);
448		if (error == 0) {
449			error = dsl_dataset_user_release_check_one(ddura, ds,
450			    holds, snapname);
451			dsl_dataset_rele(ds, FTAG);
452		}
453		if (error != 0) {
454			if (ddura->ddura_errlist != NULL) {
455				fnvlist_add_int32(ddura->ddura_errlist,
456				    snapname, error);
457			}
458			/*
459			 * Non-existent snapshots are put on the errlist,
460			 * but don't cause an overall failure.
461			 */
462			if (error != ENOENT)
463				return (error);
464		}
465	}
466
467	return (0);
468}
469
470static void
471dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
472    dmu_tx_t *tx)
473{
474	dsl_pool_t *dp = ds->ds_dir->dd_pool;
475	objset_t *mos = dp->dp_meta_objset;
476
477	for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
478	    pair = nvlist_next_nvpair(holds, pair)) {
479		int error;
480		const char *holdname = nvpair_name(pair);
481
482		/* Remove temporary hold if one exists. */
483		error = dsl_pool_user_release(dp, ds->ds_object, holdname, tx);
484		VERIFY(error == 0 || error == ENOENT);
485
486		VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, holdname,
487		    tx));
488		ds->ds_userrefs--;
489
490		spa_history_log_internal_ds(ds, "release", tx,
491		    "tag=%s refs=%lld", holdname, (longlong_t)ds->ds_userrefs);
492	}
493}
494
495static void
496dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
497{
498	dsl_dataset_user_release_arg_t *ddura = arg;
499	dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
500	dsl_pool_t *dp = dmu_tx_pool(tx);
501
502	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
503
504	for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
505	    pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
506	    pair)) {
507		dsl_dataset_t *ds;
508		const char *name = nvpair_name(pair);
509
510		VERIFY0(holdfunc(dp, name, FTAG, &ds));
511
512		dsl_dataset_user_release_sync_one(ds,
513		    fnvpair_value_nvlist(pair), tx);
514		if (nvlist_exists(ddura->ddura_todelete, name)) {
515			ASSERT(ds->ds_userrefs == 0 &&
516			    ds->ds_phys->ds_num_children == 1 &&
517			    DS_IS_DEFER_DESTROY(ds));
518			dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
519		}
520		dsl_dataset_rele(ds, FTAG);
521	}
522}
523
524/*
525 * The full semantics of this function are described in the comment above
526 * lzc_release().
527 *
528 * To summarize:
529 * Releases holds specified in the nvl holds.
530 *
531 * holds is nvl of snapname -> { holdname, ... }
532 * errlist will be filled in with snapname -> error
533 *
534 * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
535 * otherwise they should be the names of shapshots.
536 *
537 * As a release may cause snapshots to be destroyed this trys to ensure they
538 * aren't mounted.
539 *
540 * The release of non-existent holds are skipped.
541 *
542 * At least one hold must have been released for the this function to succeed
543 * and return 0.
544 */
545static int
546dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
547    dsl_pool_t *tmpdp)
548{
549	dsl_dataset_user_release_arg_t ddura;
550	nvpair_t *pair;
551	char *pool;
552	int error;
553
554	pair = nvlist_next_nvpair(holds, NULL);
555	if (pair == NULL)
556		return (0);
557
558	/*
559	 * The release may cause snapshots to be destroyed; make sure they
560	 * are not mounted.
561	 */
562	if (tmpdp != NULL) {
563		/* Temporary holds are specified by dsobj string. */
564		ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
565		pool = spa_name(tmpdp->dp_spa);
566#ifdef _KERNEL
567		for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
568		    pair = nvlist_next_nvpair(holds, pair)) {
569			dsl_dataset_t *ds;
570
571			dsl_pool_config_enter(tmpdp, FTAG);
572			error = dsl_dataset_hold_obj_string(tmpdp,
573			    nvpair_name(pair), FTAG, &ds);
574			if (error == 0) {
575				char name[MAXNAMELEN];
576				dsl_dataset_name(ds, name);
577				dsl_pool_config_exit(tmpdp, FTAG);
578				dsl_dataset_rele(ds, FTAG);
579				(void) zfs_unmount_snap(name);
580			} else {
581				dsl_pool_config_exit(tmpdp, FTAG);
582			}
583		}
584#endif
585	} else {
586		/* Non-temporary holds are specified by name. */
587		ddura.ddura_holdfunc = dsl_dataset_hold;
588		pool = nvpair_name(pair);
589#ifdef _KERNEL
590		for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
591		    pair = nvlist_next_nvpair(holds, pair)) {
592			(void) zfs_unmount_snap(nvpair_name(pair));
593		}
594#endif
595	}
596
597	ddura.ddura_holds = holds;
598	ddura.ddura_errlist = errlist;
599	ddura.ddura_todelete = fnvlist_alloc();
600	ddura.ddura_chkholds = fnvlist_alloc();
601
602	error = dsl_sync_task(pool, dsl_dataset_user_release_check,
603	    dsl_dataset_user_release_sync, &ddura, 0);
604	fnvlist_free(ddura.ddura_todelete);
605	fnvlist_free(ddura.ddura_chkholds);
606
607	return (error);
608}
609
610/*
611 * holds is nvl of snapname -> { holdname, ... }
612 * errlist will be filled in with snapname -> error
613 */
614int
615dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
616{
617	return (dsl_dataset_user_release_impl(holds, errlist, NULL));
618}
619
620/*
621 * holds is nvl of snapdsobj -> { holdname, ... }
622 */
623void
624dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)
625{
626	ASSERT(dp != NULL);
627	(void) dsl_dataset_user_release_impl(holds, NULL, dp);
628}
629
630int
631dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
632{
633	dsl_pool_t *dp;
634	dsl_dataset_t *ds;
635	int err;
636
637	err = dsl_pool_hold(dsname, FTAG, &dp);
638	if (err != 0)
639		return (err);
640	err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
641	if (err != 0) {
642		dsl_pool_rele(dp, FTAG);
643		return (err);
644	}
645
646	if (ds->ds_phys->ds_userrefs_obj != 0) {
647		zap_attribute_t *za;
648		zap_cursor_t zc;
649
650		za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
651		for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
652		    ds->ds_phys->ds_userrefs_obj);
653		    zap_cursor_retrieve(&zc, za) == 0;
654		    zap_cursor_advance(&zc)) {
655			fnvlist_add_uint64(nvl, za->za_name,
656			    za->za_first_integer);
657		}
658		zap_cursor_fini(&zc);
659		kmem_free(za, sizeof (zap_attribute_t));
660	}
661	dsl_dataset_rele(ds, FTAG);
662	dsl_pool_rele(dp, FTAG);
663	return (0);
664}
665