dsl_userhold.c revision 248571
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 */
25
26#include <sys/zfs_context.h>
27#include <sys/dsl_userhold.h>
28#include <sys/dsl_dataset.h>
29#include <sys/dsl_destroy.h>
30#include <sys/dsl_synctask.h>
31#include <sys/dmu_tx.h>
32#include <sys/zfs_onexit.h>
33#include <sys/dsl_pool.h>
34#include <sys/dsl_dir.h>
35#include <sys/zfs_ioctl.h>
36#include <sys/zap.h>
37
38typedef struct dsl_dataset_user_hold_arg {
39	nvlist_t *dduha_holds;
40	nvlist_t *dduha_errlist;
41	minor_t dduha_minor;
42} dsl_dataset_user_hold_arg_t;
43
44/*
45 * If you add new checks here, you may need to add additional checks to the
46 * "temporary" case in snapshot_check() in dmu_objset.c.
47 */
48int
49dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
50    boolean_t temphold, dmu_tx_t *tx)
51{
52	dsl_pool_t *dp = dmu_tx_pool(tx);
53	objset_t *mos = dp->dp_meta_objset;
54	int error = 0;
55
56	if (strlen(htag) > MAXNAMELEN)
57		return (E2BIG);
58	/* Tempholds have a more restricted length */
59	if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
60		return (E2BIG);
61
62	/* tags must be unique (if ds already exists) */
63	if (ds != NULL) {
64		mutex_enter(&ds->ds_lock);
65		if (ds->ds_phys->ds_userrefs_obj != 0) {
66			uint64_t value;
67			error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
68			    htag, 8, 1, &value);
69			if (error == 0)
70				error = EEXIST;
71			else if (error == ENOENT)
72				error = 0;
73		}
74		mutex_exit(&ds->ds_lock);
75	}
76
77	return (error);
78}
79
80static int
81dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
82{
83	dsl_dataset_user_hold_arg_t *dduha = arg;
84	dsl_pool_t *dp = dmu_tx_pool(tx);
85	nvpair_t *pair;
86	int rv = 0;
87
88	if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
89		return (ENOTSUP);
90
91	for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
92	    pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
93		int error = 0;
94		dsl_dataset_t *ds;
95		char *htag;
96
97		/* must be a snapshot */
98		if (strchr(nvpair_name(pair), '@') == NULL)
99			error = EINVAL;
100
101		if (error == 0)
102			error = nvpair_value_string(pair, &htag);
103		if (error == 0) {
104			error = dsl_dataset_hold(dp,
105			    nvpair_name(pair), FTAG, &ds);
106		}
107		if (error == 0) {
108			error = dsl_dataset_user_hold_check_one(ds, htag,
109			    dduha->dduha_minor != 0, tx);
110			dsl_dataset_rele(ds, FTAG);
111		}
112
113		if (error != 0) {
114			rv = error;
115			fnvlist_add_int32(dduha->dduha_errlist,
116			    nvpair_name(pair), error);
117		}
118	}
119	return (rv);
120}
121
122void
123dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
124    minor_t minor, uint64_t now, dmu_tx_t *tx)
125{
126	dsl_pool_t *dp = ds->ds_dir->dd_pool;
127	objset_t *mos = dp->dp_meta_objset;
128	uint64_t zapobj;
129
130	mutex_enter(&ds->ds_lock);
131	if (ds->ds_phys->ds_userrefs_obj == 0) {
132		/*
133		 * This is the first user hold for this dataset.  Create
134		 * the userrefs zap object.
135		 */
136		dmu_buf_will_dirty(ds->ds_dbuf, tx);
137		zapobj = ds->ds_phys->ds_userrefs_obj =
138		    zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
139	} else {
140		zapobj = ds->ds_phys->ds_userrefs_obj;
141	}
142	ds->ds_userrefs++;
143	mutex_exit(&ds->ds_lock);
144
145	VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
146
147	if (minor != 0) {
148		VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
149		    htag, now, tx));
150		dsl_register_onexit_hold_cleanup(ds, htag, minor);
151	}
152
153	spa_history_log_internal_ds(ds, "hold", tx,
154	    "tag=%s temp=%d refs=%llu",
155	    htag, minor != 0, ds->ds_userrefs);
156}
157
158static void
159dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
160{
161	dsl_dataset_user_hold_arg_t *dduha = arg;
162	dsl_pool_t *dp = dmu_tx_pool(tx);
163	nvpair_t *pair;
164	uint64_t now = gethrestime_sec();
165
166	for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
167	    pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
168		dsl_dataset_t *ds;
169		VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
170		dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
171		    dduha->dduha_minor, now, tx);
172		dsl_dataset_rele(ds, FTAG);
173	}
174}
175
176/*
177 * holds is nvl of snapname -> holdname
178 * errlist will be filled in with snapname -> error
179 * if cleanup_minor is not 0, the holds will be temporary, cleaned up
180 * when the process exits.
181 *
182 * if any fails, all will fail.
183 */
184int
185dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
186{
187	dsl_dataset_user_hold_arg_t dduha;
188	nvpair_t *pair;
189
190	pair = nvlist_next_nvpair(holds, NULL);
191	if (pair == NULL)
192		return (0);
193
194	dduha.dduha_holds = holds;
195	dduha.dduha_errlist = errlist;
196	dduha.dduha_minor = cleanup_minor;
197
198	return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
199	    dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
200}
201
202typedef struct dsl_dataset_user_release_arg {
203	nvlist_t *ddura_holds;
204	nvlist_t *ddura_todelete;
205	nvlist_t *ddura_errlist;
206} dsl_dataset_user_release_arg_t;
207
208static int
209dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
210    nvlist_t *holds, boolean_t *todelete)
211{
212	uint64_t zapobj;
213	nvpair_t *pair;
214	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
215	int error;
216	int numholds = 0;
217
218	*todelete = B_FALSE;
219
220	if (!dsl_dataset_is_snapshot(ds))
221		return (EINVAL);
222
223	zapobj = ds->ds_phys->ds_userrefs_obj;
224	if (zapobj == 0)
225		return (ESRCH);
226
227	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
228	    pair = nvlist_next_nvpair(holds, pair)) {
229		/* Make sure the hold exists */
230		uint64_t tmp;
231		error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
232		if (error == ENOENT)
233			error = ESRCH;
234		if (error != 0)
235			return (error);
236		numholds++;
237	}
238
239	if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
240	    ds->ds_userrefs == numholds) {
241		/* we need to destroy the snapshot as well */
242
243		if (dsl_dataset_long_held(ds))
244			return (EBUSY);
245		*todelete = B_TRUE;
246	}
247	return (0);
248}
249
250static int
251dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
252{
253	dsl_dataset_user_release_arg_t *ddura = arg;
254	dsl_pool_t *dp = dmu_tx_pool(tx);
255	nvpair_t *pair;
256	int rv = 0;
257
258	if (!dmu_tx_is_syncing(tx))
259		return (0);
260
261	for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
262	    pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
263		const char *name = nvpair_name(pair);
264		int error;
265		dsl_dataset_t *ds;
266		nvlist_t *holds;
267
268		error = nvpair_value_nvlist(pair, &holds);
269		if (error != 0)
270			return (EINVAL);
271
272		error = dsl_dataset_hold(dp, name, FTAG, &ds);
273		if (error == 0) {
274			boolean_t deleteme;
275			error = dsl_dataset_user_release_check_one(ds,
276			    holds, &deleteme);
277			if (error == 0 && deleteme) {
278				fnvlist_add_boolean(ddura->ddura_todelete,
279				    name);
280			}
281			dsl_dataset_rele(ds, FTAG);
282		}
283		if (error != 0) {
284			if (ddura->ddura_errlist != NULL) {
285				fnvlist_add_int32(ddura->ddura_errlist,
286				    name, error);
287			}
288			rv = error;
289		}
290	}
291	return (rv);
292}
293
294static void
295dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
296    dmu_tx_t *tx)
297{
298	dsl_pool_t *dp = ds->ds_dir->dd_pool;
299	objset_t *mos = dp->dp_meta_objset;
300	uint64_t zapobj;
301	int error;
302	nvpair_t *pair;
303
304	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
305	    pair = nvlist_next_nvpair(holds, pair)) {
306		ds->ds_userrefs--;
307		error = dsl_pool_user_release(dp, ds->ds_object,
308		    nvpair_name(pair), tx);
309		VERIFY(error == 0 || error == ENOENT);
310		zapobj = ds->ds_phys->ds_userrefs_obj;
311		VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
312
313		spa_history_log_internal_ds(ds, "release", tx,
314		    "tag=%s refs=%lld", nvpair_name(pair),
315		    (longlong_t)ds->ds_userrefs);
316	}
317}
318
319static void
320dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
321{
322	dsl_dataset_user_release_arg_t *ddura = arg;
323	dsl_pool_t *dp = dmu_tx_pool(tx);
324	nvpair_t *pair;
325
326	for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
327	    pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
328		dsl_dataset_t *ds;
329
330		VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
331		dsl_dataset_user_release_sync_one(ds,
332		    fnvpair_value_nvlist(pair), tx);
333		if (nvlist_exists(ddura->ddura_todelete,
334		    nvpair_name(pair))) {
335			ASSERT(ds->ds_userrefs == 0 &&
336			    ds->ds_phys->ds_num_children == 1 &&
337			    DS_IS_DEFER_DESTROY(ds));
338			dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
339		}
340		dsl_dataset_rele(ds, FTAG);
341	}
342}
343
344/*
345 * holds is nvl of snapname -> { holdname, ... }
346 * errlist will be filled in with snapname -> error
347 *
348 * if any fails, all will fail.
349 */
350int
351dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
352{
353	dsl_dataset_user_release_arg_t ddura;
354	nvpair_t *pair;
355	int error;
356
357	pair = nvlist_next_nvpair(holds, NULL);
358	if (pair == NULL)
359		return (0);
360
361	ddura.ddura_holds = holds;
362	ddura.ddura_errlist = errlist;
363	ddura.ddura_todelete = fnvlist_alloc();
364
365	error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
366	    dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
367	fnvlist_free(ddura.ddura_todelete);
368	return (error);
369}
370
371typedef struct dsl_dataset_user_release_tmp_arg {
372	uint64_t ddurta_dsobj;
373	nvlist_t *ddurta_holds;
374	boolean_t ddurta_deleteme;
375} dsl_dataset_user_release_tmp_arg_t;
376
377static int
378dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
379{
380	dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
381	dsl_pool_t *dp = dmu_tx_pool(tx);
382	dsl_dataset_t *ds;
383	int error;
384
385	if (!dmu_tx_is_syncing(tx))
386		return (0);
387
388	error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
389	if (error)
390		return (error);
391
392	error = dsl_dataset_user_release_check_one(ds,
393	    ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
394	dsl_dataset_rele(ds, FTAG);
395	return (error);
396}
397
398static void
399dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
400{
401	dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
402	dsl_pool_t *dp = dmu_tx_pool(tx);
403	dsl_dataset_t *ds;
404
405	VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
406	dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
407	if (ddurta->ddurta_deleteme) {
408		ASSERT(ds->ds_userrefs == 0 &&
409		    ds->ds_phys->ds_num_children == 1 &&
410		    DS_IS_DEFER_DESTROY(ds));
411		dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
412	}
413	dsl_dataset_rele(ds, FTAG);
414}
415
416/*
417 * Called at spa_load time to release a stale temporary user hold.
418 * Also called by the onexit code.
419 */
420void
421dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
422{
423	dsl_dataset_user_release_tmp_arg_t ddurta;
424	dsl_dataset_t *ds;
425	int error;
426
427#ifdef _KERNEL
428	/* Make sure it is not mounted. */
429	dsl_pool_config_enter(dp, FTAG);
430	error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
431	if (error == 0) {
432		char name[MAXNAMELEN];
433		dsl_dataset_name(ds, name);
434		dsl_dataset_rele(ds, FTAG);
435		dsl_pool_config_exit(dp, FTAG);
436		zfs_unmount_snap(name);
437	} else {
438		dsl_pool_config_exit(dp, FTAG);
439	}
440#endif
441
442	ddurta.ddurta_dsobj = dsobj;
443	ddurta.ddurta_holds = fnvlist_alloc();
444	fnvlist_add_boolean(ddurta.ddurta_holds, htag);
445
446	(void) dsl_sync_task(spa_name(dp->dp_spa),
447	    dsl_dataset_user_release_tmp_check,
448	    dsl_dataset_user_release_tmp_sync, &ddurta, 1);
449	fnvlist_free(ddurta.ddurta_holds);
450}
451
452typedef struct zfs_hold_cleanup_arg {
453	char zhca_spaname[MAXNAMELEN];
454	uint64_t zhca_spa_load_guid;
455	uint64_t zhca_dsobj;
456	char zhca_htag[MAXNAMELEN];
457} zfs_hold_cleanup_arg_t;
458
459static void
460dsl_dataset_user_release_onexit(void *arg)
461{
462	zfs_hold_cleanup_arg_t *ca = arg;
463	spa_t *spa;
464	int error;
465
466	error = spa_open(ca->zhca_spaname, &spa, FTAG);
467	if (error != 0) {
468		zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
469		    "because pool is no longer loaded",
470		    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
471		return;
472	}
473	if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
474		zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
475		    "because pool is no longer loaded (guid doesn't match)",
476		    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
477		spa_close(spa, FTAG);
478		return;
479	}
480
481	dsl_dataset_user_release_tmp(spa_get_dsl(spa),
482	    ca->zhca_dsobj, ca->zhca_htag);
483	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
484	spa_close(spa, FTAG);
485}
486
487void
488dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
489    minor_t minor)
490{
491	zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
492	spa_t *spa = dsl_dataset_get_spa(ds);
493	(void) strlcpy(ca->zhca_spaname, spa_name(spa),
494	    sizeof (ca->zhca_spaname));
495	ca->zhca_spa_load_guid = spa_load_guid(spa);
496	ca->zhca_dsobj = ds->ds_object;
497	(void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
498	VERIFY0(zfs_onexit_add_cb(minor,
499	    dsl_dataset_user_release_onexit, ca, NULL));
500}
501
502int
503dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
504{
505	dsl_pool_t *dp;
506	dsl_dataset_t *ds;
507	int err;
508
509	err = dsl_pool_hold(dsname, FTAG, &dp);
510	if (err != 0)
511		return (err);
512	err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
513	if (err != 0) {
514		dsl_pool_rele(dp, FTAG);
515		return (err);
516	}
517
518	if (ds->ds_phys->ds_userrefs_obj != 0) {
519		zap_attribute_t *za;
520		zap_cursor_t zc;
521
522		za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
523		for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
524		    ds->ds_phys->ds_userrefs_obj);
525		    zap_cursor_retrieve(&zc, za) == 0;
526		    zap_cursor_advance(&zc)) {
527			fnvlist_add_uint64(nvl, za->za_name,
528			    za->za_first_integer);
529		}
530		zap_cursor_fini(&zc);
531		kmem_free(za, sizeof (zap_attribute_t));
532	}
533	dsl_dataset_rele(ds, FTAG);
534	dsl_pool_rele(dp, FTAG);
535	return (0);
536}
537