1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org> 24 * Copyright (c) 2013, 2014 by Delphix. All rights reserved. 25 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 26 * Copyright (c) 2014 RackTop Systems. 27 */ 28 29#include <sys/dmu_objset.h> 30#include <sys/dsl_dataset.h> 31#include <sys/dsl_dir.h> 32#include <sys/dsl_prop.h> 33#include <sys/dsl_synctask.h> 34#include <sys/dmu_traverse.h> 35#include <sys/dmu_impl.h> 36#include <sys/dmu_tx.h> 37#include <sys/arc.h> 38#include <sys/zio.h> 39#include <sys/zap.h> 40#include <sys/zfeature.h> 41#include <sys/unique.h> 42#include <sys/zfs_context.h> 43#include <sys/zfs_ioctl.h> 44#include <sys/spa.h> 45#include <sys/zfs_znode.h> 46#include <sys/zfs_onexit.h> 47#include <sys/zvol.h> 48#include <sys/dsl_scan.h> 49#include <sys/dsl_deadlist.h> 50#include <sys/dsl_destroy.h> 51#include <sys/dsl_userhold.h> 52#include <sys/dsl_bookmark.h> 53 54#define SWITCH64(x, y) \ 55 { \ 56 uint64_t __tmp = (x); \ 57 (x) = (y); \ 58 (y) = __tmp; \ 59 } 60 61#define DS_REF_MAX (1ULL << 62) 62 63#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 64 65/* 66 * Figure out how much of this delta should be propogated to the dsl_dir 67 * layer. If there's a refreservation, that space has already been 68 * partially accounted for in our ancestors. 69 */ 70static int64_t 71parent_delta(dsl_dataset_t *ds, int64_t delta) 72{ 73 uint64_t old_bytes, new_bytes; 74 75 if (ds->ds_reserved == 0) 76 return (delta); 77 78 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 79 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 80 81 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 82 return (new_bytes - old_bytes); 83} 84 85void 86dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 87{ 88 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 89 int compressed = BP_GET_PSIZE(bp); 90 int uncompressed = BP_GET_UCSIZE(bp); 91 int64_t delta; 92 93 dprintf_bp(bp, "ds=%p", ds); 94 95 ASSERT(dmu_tx_is_syncing(tx)); 96 /* It could have been compressed away to nothing */ 97 if (BP_IS_HOLE(bp)) 98 return; 99 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 100 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 101 if (ds == NULL) { 102 dsl_pool_mos_diduse_space(tx->tx_pool, 103 used, compressed, uncompressed); 104 return; 105 } 106 107 dmu_buf_will_dirty(ds->ds_dbuf, tx); 108 mutex_enter(&ds->ds_lock); 109 delta = parent_delta(ds, used); 110 ds->ds_phys->ds_referenced_bytes += used; 111 ds->ds_phys->ds_compressed_bytes += compressed; 112 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 113 ds->ds_phys->ds_unique_bytes += used; 114 mutex_exit(&ds->ds_lock); 115 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 116 compressed, uncompressed, tx); 117 dsl_dir_transfer_space(ds->ds_dir, used - delta, 118 DD_USED_REFRSRV, DD_USED_HEAD, tx); 119} 120 121int 122dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 123 boolean_t async) 124{ 125 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 126 int compressed = BP_GET_PSIZE(bp); 127 int uncompressed = BP_GET_UCSIZE(bp); 128 129 if (BP_IS_HOLE(bp)) 130 return (0); 131 132 ASSERT(dmu_tx_is_syncing(tx)); 133 ASSERT(bp->blk_birth <= tx->tx_txg); 134 135 if (ds == NULL) { 136 dsl_free(tx->tx_pool, tx->tx_txg, bp); 137 dsl_pool_mos_diduse_space(tx->tx_pool, 138 -used, -compressed, -uncompressed); 139 return (used); 140 } 141 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 142 143 ASSERT(!dsl_dataset_is_snapshot(ds)); 144 dmu_buf_will_dirty(ds->ds_dbuf, tx); 145 146 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 147 int64_t delta; 148 149 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 150 dsl_free(tx->tx_pool, tx->tx_txg, bp); 151 152 mutex_enter(&ds->ds_lock); 153 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 154 !DS_UNIQUE_IS_ACCURATE(ds)); 155 delta = parent_delta(ds, -used); 156 ds->ds_phys->ds_unique_bytes -= used; 157 mutex_exit(&ds->ds_lock); 158 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 159 delta, -compressed, -uncompressed, tx); 160 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 161 DD_USED_REFRSRV, DD_USED_HEAD, tx); 162 } else { 163 dprintf_bp(bp, "putting on dead list: %s", ""); 164 if (async) { 165 /* 166 * We are here as part of zio's write done callback, 167 * which means we're a zio interrupt thread. We can't 168 * call dsl_deadlist_insert() now because it may block 169 * waiting for I/O. Instead, put bp on the deferred 170 * queue and let dsl_pool_sync() finish the job. 171 */ 172 bplist_append(&ds->ds_pending_deadlist, bp); 173 } else { 174 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 175 } 176 ASSERT3U(ds->ds_prev->ds_object, ==, 177 ds->ds_phys->ds_prev_snap_obj); 178 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 179 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 180 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 181 ds->ds_object && bp->blk_birth > 182 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 183 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 184 mutex_enter(&ds->ds_prev->ds_lock); 185 ds->ds_prev->ds_phys->ds_unique_bytes += used; 186 mutex_exit(&ds->ds_prev->ds_lock); 187 } 188 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 189 dsl_dir_transfer_space(ds->ds_dir, used, 190 DD_USED_HEAD, DD_USED_SNAP, tx); 191 } 192 } 193 mutex_enter(&ds->ds_lock); 194 ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); 195 ds->ds_phys->ds_referenced_bytes -= used; 196 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 197 ds->ds_phys->ds_compressed_bytes -= compressed; 198 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 199 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 200 mutex_exit(&ds->ds_lock); 201 202 return (used); 203} 204 205uint64_t 206dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 207{ 208 uint64_t trysnap = 0; 209 210 if (ds == NULL) 211 return (0); 212 /* 213 * The snapshot creation could fail, but that would cause an 214 * incorrect FALSE return, which would only result in an 215 * overestimation of the amount of space that an operation would 216 * consume, which is OK. 217 * 218 * There's also a small window where we could miss a pending 219 * snapshot, because we could set the sync task in the quiescing 220 * phase. So this should only be used as a guess. 221 */ 222 if (ds->ds_trysnap_txg > 223 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 224 trysnap = ds->ds_trysnap_txg; 225 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 226} 227 228boolean_t 229dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 230 uint64_t blk_birth) 231{ 232 if (blk_birth <= dsl_dataset_prev_snap_txg(ds) || 233 (bp != NULL && BP_IS_HOLE(bp))) 234 return (B_FALSE); 235 236 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 237 238 return (B_TRUE); 239} 240 241/* ARGSUSED */ 242static void 243dsl_dataset_evict(dmu_buf_t *db, void *dsv) 244{ 245 dsl_dataset_t *ds = dsv; 246 247 ASSERT(ds->ds_owner == NULL); 248 249 unique_remove(ds->ds_fsid_guid); 250 251 if (ds->ds_objset != NULL) 252 dmu_objset_evict(ds->ds_objset); 253 254 if (ds->ds_prev) { 255 dsl_dataset_rele(ds->ds_prev, ds); 256 ds->ds_prev = NULL; 257 } 258 259 bplist_destroy(&ds->ds_pending_deadlist); 260 if (ds->ds_phys->ds_deadlist_obj != 0) 261 dsl_deadlist_close(&ds->ds_deadlist); 262 if (ds->ds_dir) 263 dsl_dir_rele(ds->ds_dir, ds); 264 265 ASSERT(!list_link_active(&ds->ds_synced_link)); 266 267 if (mutex_owned(&ds->ds_lock)) 268 mutex_exit(&ds->ds_lock); 269 mutex_destroy(&ds->ds_lock); 270 if (mutex_owned(&ds->ds_opening_lock)) 271 mutex_exit(&ds->ds_opening_lock); 272 mutex_destroy(&ds->ds_opening_lock); 273 mutex_destroy(&ds->ds_sendstream_lock); 274 refcount_destroy(&ds->ds_longholds); 275 276 kmem_free(ds, sizeof (dsl_dataset_t)); 277} 278 279int 280dsl_dataset_get_snapname(dsl_dataset_t *ds) 281{ 282 dsl_dataset_phys_t *headphys; 283 int err; 284 dmu_buf_t *headdbuf; 285 dsl_pool_t *dp = ds->ds_dir->dd_pool; 286 objset_t *mos = dp->dp_meta_objset; 287 288 if (ds->ds_snapname[0]) 289 return (0); 290 if (ds->ds_phys->ds_next_snap_obj == 0) 291 return (0); 292 293 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 294 FTAG, &headdbuf); 295 if (err != 0) 296 return (err); 297 headphys = headdbuf->db_data; 298 err = zap_value_search(dp->dp_meta_objset, 299 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 300 dmu_buf_rele(headdbuf, FTAG); 301 return (err); 302} 303 304int 305dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 306{ 307 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 308 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 309 matchtype_t mt; 310 int err; 311 312 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 313 mt = MT_FIRST; 314 else 315 mt = MT_EXACT; 316 317 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 318 value, mt, NULL, 0, NULL); 319 if (err == ENOTSUP && mt == MT_FIRST) 320 err = zap_lookup(mos, snapobj, name, 8, 1, value); 321 return (err); 322} 323 324int 325dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, 326 boolean_t adj_cnt) 327{ 328 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 329 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 330 matchtype_t mt; 331 int err; 332 333 dsl_dir_snap_cmtime_update(ds->ds_dir); 334 335 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 336 mt = MT_FIRST; 337 else 338 mt = MT_EXACT; 339 340 err = zap_remove_norm(mos, snapobj, name, mt, tx); 341 if (err == ENOTSUP && mt == MT_FIRST) 342 err = zap_remove(mos, snapobj, name, tx); 343 344 if (err == 0 && adj_cnt) 345 dsl_fs_ss_count_adjust(ds->ds_dir, -1, 346 DD_FIELD_SNAPSHOT_COUNT, tx); 347 348 return (err); 349} 350 351int 352dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 353 dsl_dataset_t **dsp) 354{ 355 objset_t *mos = dp->dp_meta_objset; 356 dmu_buf_t *dbuf; 357 dsl_dataset_t *ds; 358 int err; 359 dmu_object_info_t doi; 360 361 ASSERT(dsl_pool_config_held(dp)); 362 363 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 364 if (err != 0) 365 return (err); 366 367 /* Make sure dsobj has the correct object type. */ 368 dmu_object_info_from_db(dbuf, &doi); 369 if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) { 370 dmu_buf_rele(dbuf, tag); 371 return (SET_ERROR(EINVAL)); 372 } 373 374 ds = dmu_buf_get_user(dbuf); 375 if (ds == NULL) { 376 dsl_dataset_t *winner = NULL; 377 378 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 379 ds->ds_dbuf = dbuf; 380 ds->ds_object = dsobj; 381 ds->ds_phys = dbuf->db_data; 382 383 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 384 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 385 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 386 refcount_create(&ds->ds_longholds); 387 388 bplist_create(&ds->ds_pending_deadlist); 389 dsl_deadlist_open(&ds->ds_deadlist, 390 mos, ds->ds_phys->ds_deadlist_obj); 391 392 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 393 offsetof(dmu_sendarg_t, dsa_link)); 394 395 if (err == 0) { 396 err = dsl_dir_hold_obj(dp, 397 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 398 } 399 if (err != 0) { 400 mutex_destroy(&ds->ds_lock); 401 mutex_destroy(&ds->ds_opening_lock); 402 mutex_destroy(&ds->ds_sendstream_lock); 403 refcount_destroy(&ds->ds_longholds); 404 bplist_destroy(&ds->ds_pending_deadlist); 405 dsl_deadlist_close(&ds->ds_deadlist); 406 kmem_free(ds, sizeof (dsl_dataset_t)); 407 dmu_buf_rele(dbuf, tag); 408 return (err); 409 } 410 411 if (!dsl_dataset_is_snapshot(ds)) { 412 ds->ds_snapname[0] = '\0'; 413 if (ds->ds_phys->ds_prev_snap_obj != 0) { 414 err = dsl_dataset_hold_obj(dp, 415 ds->ds_phys->ds_prev_snap_obj, 416 ds, &ds->ds_prev); 417 } 418 if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 419 int zaperr = zap_lookup(mos, ds->ds_object, 420 DS_FIELD_BOOKMARK_NAMES, 421 sizeof (ds->ds_bookmarks), 1, 422 &ds->ds_bookmarks); 423 if (zaperr != ENOENT) 424 VERIFY0(zaperr); 425 } 426 } else { 427 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 428 err = dsl_dataset_get_snapname(ds); 429 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 430 err = zap_count( 431 ds->ds_dir->dd_pool->dp_meta_objset, 432 ds->ds_phys->ds_userrefs_obj, 433 &ds->ds_userrefs); 434 } 435 } 436 437 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 438 err = dsl_prop_get_int_ds(ds, 439 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 440 &ds->ds_reserved); 441 if (err == 0) { 442 err = dsl_prop_get_int_ds(ds, 443 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 444 &ds->ds_quota); 445 } 446 } else { 447 ds->ds_reserved = ds->ds_quota = 0; 448 } 449 450 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, 451 &ds->ds_phys, dsl_dataset_evict)) != NULL) { 452 bplist_destroy(&ds->ds_pending_deadlist); 453 dsl_deadlist_close(&ds->ds_deadlist); 454 if (ds->ds_prev) 455 dsl_dataset_rele(ds->ds_prev, ds); 456 dsl_dir_rele(ds->ds_dir, ds); 457 mutex_destroy(&ds->ds_lock); 458 mutex_destroy(&ds->ds_opening_lock); 459 mutex_destroy(&ds->ds_sendstream_lock); 460 refcount_destroy(&ds->ds_longholds); 461 kmem_free(ds, sizeof (dsl_dataset_t)); 462 if (err != 0) { 463 dmu_buf_rele(dbuf, tag); 464 return (err); 465 } 466 ds = winner; 467 } else { 468 ds->ds_fsid_guid = 469 unique_insert(ds->ds_phys->ds_fsid_guid); 470 } 471 } 472 ASSERT3P(ds->ds_dbuf, ==, dbuf); 473 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 474 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 475 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 476 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 477 *dsp = ds; 478 return (0); 479} 480 481int 482dsl_dataset_hold(dsl_pool_t *dp, const char *name, 483 void *tag, dsl_dataset_t **dsp) 484{ 485 dsl_dir_t *dd; 486 const char *snapname; 487 uint64_t obj; 488 int err = 0; 489 490 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 491 if (err != 0) 492 return (err); 493 494 ASSERT(dsl_pool_config_held(dp)); 495 obj = dd->dd_phys->dd_head_dataset_obj; 496 if (obj != 0) 497 err = dsl_dataset_hold_obj(dp, obj, tag, dsp); 498 else 499 err = SET_ERROR(ENOENT); 500 501 /* we may be looking for a snapshot */ 502 if (err == 0 && snapname != NULL) { 503 dsl_dataset_t *ds; 504 505 if (*snapname++ != '@') { 506 dsl_dataset_rele(*dsp, tag); 507 dsl_dir_rele(dd, FTAG); 508 return (SET_ERROR(ENOENT)); 509 } 510 511 dprintf("looking for snapshot '%s'\n", snapname); 512 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 513 if (err == 0) 514 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 515 dsl_dataset_rele(*dsp, tag); 516 517 if (err == 0) { 518 mutex_enter(&ds->ds_lock); 519 if (ds->ds_snapname[0] == 0) 520 (void) strlcpy(ds->ds_snapname, snapname, 521 sizeof (ds->ds_snapname)); 522 mutex_exit(&ds->ds_lock); 523 *dsp = ds; 524 } 525 } 526 527 dsl_dir_rele(dd, FTAG); 528 return (err); 529} 530 531int 532dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 533 void *tag, dsl_dataset_t **dsp) 534{ 535 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 536 if (err != 0) 537 return (err); 538 if (!dsl_dataset_tryown(*dsp, tag)) { 539 dsl_dataset_rele(*dsp, tag); 540 *dsp = NULL; 541 return (SET_ERROR(EBUSY)); 542 } 543 return (0); 544} 545 546int 547dsl_dataset_own(dsl_pool_t *dp, const char *name, 548 void *tag, dsl_dataset_t **dsp) 549{ 550 int err = dsl_dataset_hold(dp, name, tag, dsp); 551 if (err != 0) 552 return (err); 553 if (!dsl_dataset_tryown(*dsp, tag)) { 554 dsl_dataset_rele(*dsp, tag); 555 return (SET_ERROR(EBUSY)); 556 } 557 return (0); 558} 559 560/* 561 * See the comment above dsl_pool_hold() for details. In summary, a long 562 * hold is used to prevent destruction of a dataset while the pool hold 563 * is dropped, allowing other concurrent operations (e.g. spa_sync()). 564 * 565 * The dataset and pool must be held when this function is called. After it 566 * is called, the pool hold may be released while the dataset is still held 567 * and accessed. 568 */ 569void 570dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 571{ 572 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 573 (void) refcount_add(&ds->ds_longholds, tag); 574} 575 576void 577dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 578{ 579 (void) refcount_remove(&ds->ds_longholds, tag); 580} 581 582/* Return B_TRUE if there are any long holds on this dataset. */ 583boolean_t 584dsl_dataset_long_held(dsl_dataset_t *ds) 585{ 586 return (!refcount_is_zero(&ds->ds_longholds)); 587} 588 589void 590dsl_dataset_name(dsl_dataset_t *ds, char *name) 591{ 592 if (ds == NULL) { 593 (void) strcpy(name, "mos"); 594 } else { 595 dsl_dir_name(ds->ds_dir, name); 596 VERIFY0(dsl_dataset_get_snapname(ds)); 597 if (ds->ds_snapname[0]) { 598 (void) strcat(name, "@"); 599 /* 600 * We use a "recursive" mutex so that we 601 * can call dprintf_ds() with ds_lock held. 602 */ 603 if (!MUTEX_HELD(&ds->ds_lock)) { 604 mutex_enter(&ds->ds_lock); 605 (void) strcat(name, ds->ds_snapname); 606 mutex_exit(&ds->ds_lock); 607 } else { 608 (void) strcat(name, ds->ds_snapname); 609 } 610 } 611 } 612} 613 614void 615dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 616{ 617 dmu_buf_rele(ds->ds_dbuf, tag); 618} 619 620void 621dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 622{ 623 ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL); 624 625 mutex_enter(&ds->ds_lock); 626 ds->ds_owner = NULL; 627 mutex_exit(&ds->ds_lock); 628 dsl_dataset_long_rele(ds, tag); 629 if (ds->ds_dbuf != NULL) 630 dsl_dataset_rele(ds, tag); 631 else 632 dsl_dataset_evict(NULL, ds); 633} 634 635boolean_t 636dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 637{ 638 boolean_t gotit = FALSE; 639 640 mutex_enter(&ds->ds_lock); 641 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 642 ds->ds_owner = tag; 643 dsl_dataset_long_hold(ds, tag); 644 gotit = TRUE; 645 } 646 mutex_exit(&ds->ds_lock); 647 return (gotit); 648} 649 650uint64_t 651dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 652 uint64_t flags, dmu_tx_t *tx) 653{ 654 dsl_pool_t *dp = dd->dd_pool; 655 dmu_buf_t *dbuf; 656 dsl_dataset_phys_t *dsphys; 657 uint64_t dsobj; 658 objset_t *mos = dp->dp_meta_objset; 659 660 if (origin == NULL) 661 origin = dp->dp_origin_snap; 662 663 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 664 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 665 ASSERT(dmu_tx_is_syncing(tx)); 666 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 667 668 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 669 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 670 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 671 dmu_buf_will_dirty(dbuf, tx); 672 dsphys = dbuf->db_data; 673 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 674 dsphys->ds_dir_obj = dd->dd_object; 675 dsphys->ds_flags = flags; 676 dsphys->ds_fsid_guid = unique_create(); 677 do { 678 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 679 sizeof (dsphys->ds_guid)); 680 } while (dsphys->ds_guid == 0); 681 dsphys->ds_snapnames_zapobj = 682 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 683 DMU_OT_NONE, 0, tx); 684 dsphys->ds_creation_time = gethrestime_sec(); 685 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 686 687 if (origin == NULL) { 688 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 689 } else { 690 dsl_dataset_t *ohds; /* head of the origin snapshot */ 691 692 dsphys->ds_prev_snap_obj = origin->ds_object; 693 dsphys->ds_prev_snap_txg = 694 origin->ds_phys->ds_creation_txg; 695 dsphys->ds_referenced_bytes = 696 origin->ds_phys->ds_referenced_bytes; 697 dsphys->ds_compressed_bytes = 698 origin->ds_phys->ds_compressed_bytes; 699 dsphys->ds_uncompressed_bytes = 700 origin->ds_phys->ds_uncompressed_bytes; 701 dsphys->ds_bp = origin->ds_phys->ds_bp; 702 dsphys->ds_flags |= origin->ds_phys->ds_flags; 703 704 dmu_buf_will_dirty(origin->ds_dbuf, tx); 705 origin->ds_phys->ds_num_children++; 706 707 VERIFY0(dsl_dataset_hold_obj(dp, 708 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 709 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 710 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 711 dsl_dataset_rele(ohds, FTAG); 712 713 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 714 if (origin->ds_phys->ds_next_clones_obj == 0) { 715 origin->ds_phys->ds_next_clones_obj = 716 zap_create(mos, 717 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 718 } 719 VERIFY0(zap_add_int(mos, 720 origin->ds_phys->ds_next_clones_obj, dsobj, tx)); 721 } 722 723 dmu_buf_will_dirty(dd->dd_dbuf, tx); 724 dd->dd_phys->dd_origin_obj = origin->ds_object; 725 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 726 if (origin->ds_dir->dd_phys->dd_clones == 0) { 727 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 728 origin->ds_dir->dd_phys->dd_clones = 729 zap_create(mos, 730 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 731 } 732 VERIFY0(zap_add_int(mos, 733 origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 734 } 735 } 736 737 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 738 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 739 740 dmu_buf_rele(dbuf, FTAG); 741 742 dmu_buf_will_dirty(dd->dd_dbuf, tx); 743 dd->dd_phys->dd_head_dataset_obj = dsobj; 744 745 return (dsobj); 746} 747 748static void 749dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 750{ 751 objset_t *os; 752 753 VERIFY0(dmu_objset_from_ds(ds, &os)); 754 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 755 dsl_dataset_dirty(ds, tx); 756} 757 758uint64_t 759dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 760 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 761{ 762 dsl_pool_t *dp = pdd->dd_pool; 763 uint64_t dsobj, ddobj; 764 dsl_dir_t *dd; 765 766 ASSERT(dmu_tx_is_syncing(tx)); 767 ASSERT(lastname[0] != '@'); 768 769 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 770 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 771 772 dsobj = dsl_dataset_create_sync_dd(dd, origin, 773 flags & ~DS_CREATE_FLAG_NODIRTY, tx); 774 775 dsl_deleg_set_create_perms(dd, tx, cr); 776 777 /* 778 * Since we're creating a new node we know it's a leaf, so we can 779 * initialize the counts if the limit feature is active. 780 */ 781 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 782 uint64_t cnt = 0; 783 objset_t *os = dd->dd_pool->dp_meta_objset; 784 785 dsl_dir_zapify(dd, tx); 786 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 787 sizeof (cnt), 1, &cnt, tx)); 788 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 789 sizeof (cnt), 1, &cnt, tx)); 790 } 791 792 dsl_dir_rele(dd, FTAG); 793 794 /* 795 * If we are creating a clone, make sure we zero out any stale 796 * data from the origin snapshots zil header. 797 */ 798 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 799 dsl_dataset_t *ds; 800 801 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 802 dsl_dataset_zero_zil(ds, tx); 803 dsl_dataset_rele(ds, FTAG); 804 } 805 806 return (dsobj); 807} 808 809#ifdef __FreeBSD__ 810/* FreeBSD ioctl compat begin */ 811struct destroyarg { 812 nvlist_t *nvl; 813 const char *snapname; 814}; 815 816static int 817dsl_check_snap_cb(const char *name, void *arg) 818{ 819 struct destroyarg *da = arg; 820 dsl_dataset_t *ds; 821 char *dsname; 822 823 dsname = kmem_asprintf("%s@%s", name, da->snapname); 824 fnvlist_add_boolean(da->nvl, dsname); 825 kmem_free(dsname, strlen(dsname) + 1); 826 827 return (0); 828} 829 830int 831dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname, 832 nvlist_t *snaps) 833{ 834 struct destroyarg *da; 835 int err; 836 837 da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); 838 da->nvl = snaps; 839 da->snapname = snapname; 840 err = dmu_objset_find(fsname, dsl_check_snap_cb, da, 841 DS_FIND_CHILDREN); 842 kmem_free(da, sizeof (struct destroyarg)); 843 844 return (err); 845} 846/* FreeBSD ioctl compat end */ 847#endif /* __FreeBSD__ */ 848 849/* 850 * The unique space in the head dataset can be calculated by subtracting 851 * the space used in the most recent snapshot, that is still being used 852 * in this file system, from the space currently in use. To figure out 853 * the space in the most recent snapshot still in use, we need to take 854 * the total space used in the snapshot and subtract out the space that 855 * has been freed up since the snapshot was taken. 856 */ 857void 858dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 859{ 860 uint64_t mrs_used; 861 uint64_t dlused, dlcomp, dluncomp; 862 863 ASSERT(!dsl_dataset_is_snapshot(ds)); 864 865 if (ds->ds_phys->ds_prev_snap_obj != 0) 866 mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; 867 else 868 mrs_used = 0; 869 870 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 871 872 ASSERT3U(dlused, <=, mrs_used); 873 ds->ds_phys->ds_unique_bytes = 874 ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); 875 876 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 877 SPA_VERSION_UNIQUE_ACCURATE) 878 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 879} 880 881void 882dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 883 dmu_tx_t *tx) 884{ 885 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 886 uint64_t count; 887 int err; 888 889 ASSERT(ds->ds_phys->ds_num_children >= 2); 890 err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 891 /* 892 * The err should not be ENOENT, but a bug in a previous version 893 * of the code could cause upgrade_clones_cb() to not set 894 * ds_next_snap_obj when it should, leading to a missing entry. 895 * If we knew that the pool was created after 896 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 897 * ENOENT. However, at least we can check that we don't have 898 * too many entries in the next_clones_obj even after failing to 899 * remove this one. 900 */ 901 if (err != ENOENT) 902 VERIFY0(err); 903 ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 904 &count)); 905 ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 906} 907 908 909blkptr_t * 910dsl_dataset_get_blkptr(dsl_dataset_t *ds) 911{ 912 return (&ds->ds_phys->ds_bp); 913} 914 915void 916dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 917{ 918 ASSERT(dmu_tx_is_syncing(tx)); 919 /* If it's the meta-objset, set dp_meta_rootbp */ 920 if (ds == NULL) { 921 tx->tx_pool->dp_meta_rootbp = *bp; 922 } else { 923 dmu_buf_will_dirty(ds->ds_dbuf, tx); 924 ds->ds_phys->ds_bp = *bp; 925 } 926} 927 928spa_t * 929dsl_dataset_get_spa(dsl_dataset_t *ds) 930{ 931 return (ds->ds_dir->dd_pool->dp_spa); 932} 933 934void 935dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 936{ 937 dsl_pool_t *dp; 938 939 if (ds == NULL) /* this is the meta-objset */ 940 return; 941 942 ASSERT(ds->ds_objset != NULL); 943 944 if (ds->ds_phys->ds_next_snap_obj != 0) 945 panic("dirtying snapshot!"); 946 947 dp = ds->ds_dir->dd_pool; 948 949 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 950 /* up the hold count until we can be written out */ 951 dmu_buf_add_ref(ds->ds_dbuf, ds); 952 } 953} 954 955boolean_t 956dsl_dataset_is_dirty(dsl_dataset_t *ds) 957{ 958 for (int t = 0; t < TXG_SIZE; t++) { 959 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 960 ds, t)) 961 return (B_TRUE); 962 } 963 return (B_FALSE); 964} 965 966static int 967dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 968{ 969 uint64_t asize; 970 971 if (!dmu_tx_is_syncing(tx)) 972 return (0); 973 974 /* 975 * If there's an fs-only reservation, any blocks that might become 976 * owned by the snapshot dataset must be accommodated by space 977 * outside of the reservation. 978 */ 979 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 980 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 981 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 982 return (SET_ERROR(ENOSPC)); 983 984 /* 985 * Propagate any reserved space for this snapshot to other 986 * snapshot checks in this sync group. 987 */ 988 if (asize > 0) 989 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 990 991 return (0); 992} 993 994typedef struct dsl_dataset_snapshot_arg { 995 nvlist_t *ddsa_snaps; 996 nvlist_t *ddsa_props; 997 nvlist_t *ddsa_errors; 998 cred_t *ddsa_cr; 999} dsl_dataset_snapshot_arg_t; 1000 1001int 1002dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, 1003 dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr) 1004{ 1005 int error; 1006 uint64_t value; 1007 1008 ds->ds_trysnap_txg = tx->tx_txg; 1009 1010 if (!dmu_tx_is_syncing(tx)) 1011 return (0); 1012 1013 /* 1014 * We don't allow multiple snapshots of the same txg. If there 1015 * is already one, try again. 1016 */ 1017 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1018 return (SET_ERROR(EAGAIN)); 1019 1020 /* 1021 * Check for conflicting snapshot name. 1022 */ 1023 error = dsl_dataset_snap_lookup(ds, snapname, &value); 1024 if (error == 0) 1025 return (SET_ERROR(EEXIST)); 1026 if (error != ENOENT) 1027 return (error); 1028 1029 /* 1030 * We don't allow taking snapshots of inconsistent datasets, such as 1031 * those into which we are currently receiving. However, if we are 1032 * creating this snapshot as part of a receive, this check will be 1033 * executed atomically with respect to the completion of the receive 1034 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this 1035 * case we ignore this, knowing it will be fixed up for us shortly in 1036 * dmu_recv_end_sync(). 1037 */ 1038 if (!recv && DS_IS_INCONSISTENT(ds)) 1039 return (SET_ERROR(EBUSY)); 1040 1041 /* 1042 * Skip the check for temporary snapshots or if we have already checked 1043 * the counts in dsl_dataset_snapshot_check. This means we really only 1044 * check the count here when we're receiving a stream. 1045 */ 1046 if (cnt != 0 && cr != NULL) { 1047 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1048 ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr); 1049 if (error != 0) 1050 return (error); 1051 } 1052 1053 error = dsl_dataset_snapshot_reserve_space(ds, tx); 1054 if (error != 0) 1055 return (error); 1056 1057 return (0); 1058} 1059 1060static int 1061dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) 1062{ 1063 dsl_dataset_snapshot_arg_t *ddsa = arg; 1064 dsl_pool_t *dp = dmu_tx_pool(tx); 1065 nvpair_t *pair; 1066 int rv = 0; 1067 1068 /* 1069 * Pre-compute how many total new snapshots will be created for each 1070 * level in the tree and below. This is needed for validating the 1071 * snapshot limit when either taking a recursive snapshot or when 1072 * taking multiple snapshots. 1073 * 1074 * The problem is that the counts are not actually adjusted when 1075 * we are checking, only when we finally sync. For a single snapshot, 1076 * this is easy, the count will increase by 1 at each node up the tree, 1077 * but its more complicated for the recursive/multiple snapshot case. 1078 * 1079 * The dsl_fs_ss_limit_check function does recursively check the count 1080 * at each level up the tree but since it is validating each snapshot 1081 * independently we need to be sure that we are validating the complete 1082 * count for the entire set of snapshots. We do this by rolling up the 1083 * counts for each component of the name into an nvlist and then 1084 * checking each of those cases with the aggregated count. 1085 * 1086 * This approach properly handles not only the recursive snapshot 1087 * case (where we get all of those on the ddsa_snaps list) but also 1088 * the sibling case (e.g. snapshot a/b and a/c so that we will also 1089 * validate the limit on 'a' using a count of 2). 1090 * 1091 * We validate the snapshot names in the third loop and only report 1092 * name errors once. 1093 */ 1094 if (dmu_tx_is_syncing(tx)) { 1095 nvlist_t *cnt_track = NULL; 1096 cnt_track = fnvlist_alloc(); 1097 1098 /* Rollup aggregated counts into the cnt_track list */ 1099 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1100 pair != NULL; 1101 pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1102 char *pdelim; 1103 uint64_t val; 1104 char nm[MAXPATHLEN]; 1105 1106 (void) strlcpy(nm, nvpair_name(pair), sizeof (nm)); 1107 pdelim = strchr(nm, '@'); 1108 if (pdelim == NULL) 1109 continue; 1110 *pdelim = '\0'; 1111 1112 do { 1113 if (nvlist_lookup_uint64(cnt_track, nm, 1114 &val) == 0) { 1115 /* update existing entry */ 1116 fnvlist_add_uint64(cnt_track, nm, 1117 val + 1); 1118 } else { 1119 /* add to list */ 1120 fnvlist_add_uint64(cnt_track, nm, 1); 1121 } 1122 1123 pdelim = strrchr(nm, '/'); 1124 if (pdelim != NULL) 1125 *pdelim = '\0'; 1126 } while (pdelim != NULL); 1127 } 1128 1129 /* Check aggregated counts at each level */ 1130 for (pair = nvlist_next_nvpair(cnt_track, NULL); 1131 pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) { 1132 int error = 0; 1133 char *name; 1134 uint64_t cnt = 0; 1135 dsl_dataset_t *ds; 1136 1137 name = nvpair_name(pair); 1138 cnt = fnvpair_value_uint64(pair); 1139 ASSERT(cnt > 0); 1140 1141 error = dsl_dataset_hold(dp, name, FTAG, &ds); 1142 if (error == 0) { 1143 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1144 ZFS_PROP_SNAPSHOT_LIMIT, NULL, 1145 ddsa->ddsa_cr); 1146 dsl_dataset_rele(ds, FTAG); 1147 } 1148 1149 if (error != 0) { 1150 if (ddsa->ddsa_errors != NULL) 1151 fnvlist_add_int32(ddsa->ddsa_errors, 1152 name, error); 1153 rv = error; 1154 /* only report one error for this check */ 1155 break; 1156 } 1157 } 1158 nvlist_free(cnt_track); 1159 } 1160 1161 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1162 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1163 int error = 0; 1164 dsl_dataset_t *ds; 1165 char *name, *atp; 1166 char dsname[MAXNAMELEN]; 1167 1168 name = nvpair_name(pair); 1169 if (strlen(name) >= MAXNAMELEN) 1170 error = SET_ERROR(ENAMETOOLONG); 1171 if (error == 0) { 1172 atp = strchr(name, '@'); 1173 if (atp == NULL) 1174 error = SET_ERROR(EINVAL); 1175 if (error == 0) 1176 (void) strlcpy(dsname, name, atp - name + 1); 1177 } 1178 if (error == 0) 1179 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 1180 if (error == 0) { 1181 /* passing 0/NULL skips dsl_fs_ss_limit_check */ 1182 error = dsl_dataset_snapshot_check_impl(ds, 1183 atp + 1, tx, B_FALSE, 0, NULL); 1184 dsl_dataset_rele(ds, FTAG); 1185 } 1186 1187 if (error != 0) { 1188 if (ddsa->ddsa_errors != NULL) { 1189 fnvlist_add_int32(ddsa->ddsa_errors, 1190 name, error); 1191 } 1192 rv = error; 1193 } 1194 } 1195 1196 return (rv); 1197} 1198 1199void 1200dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, 1201 dmu_tx_t *tx) 1202{ 1203 static zil_header_t zero_zil; 1204 1205 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1206 dmu_buf_t *dbuf; 1207 dsl_dataset_phys_t *dsphys; 1208 uint64_t dsobj, crtxg; 1209 objset_t *mos = dp->dp_meta_objset; 1210 objset_t *os; 1211 1212 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1213 1214 /* 1215 * If we are on an old pool, the zil must not be active, in which 1216 * case it will be zeroed. Usually zil_suspend() accomplishes this. 1217 */ 1218 ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || 1219 dmu_objset_from_ds(ds, &os) != 0 || 1220 bcmp(&os->os_phys->os_zil_header, &zero_zil, 1221 sizeof (zero_zil)) == 0); 1222 1223 dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); 1224 1225 /* 1226 * The origin's ds_creation_txg has to be < TXG_INITIAL 1227 */ 1228 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1229 crtxg = 1; 1230 else 1231 crtxg = tx->tx_txg; 1232 1233 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1234 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1235 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1236 dmu_buf_will_dirty(dbuf, tx); 1237 dsphys = dbuf->db_data; 1238 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1239 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1240 dsphys->ds_fsid_guid = unique_create(); 1241 do { 1242 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1243 sizeof (dsphys->ds_guid)); 1244 } while (dsphys->ds_guid == 0); 1245 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1246 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1247 dsphys->ds_next_snap_obj = ds->ds_object; 1248 dsphys->ds_num_children = 1; 1249 dsphys->ds_creation_time = gethrestime_sec(); 1250 dsphys->ds_creation_txg = crtxg; 1251 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1252 dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes; 1253 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1254 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1255 dsphys->ds_flags = ds->ds_phys->ds_flags; 1256 dsphys->ds_bp = ds->ds_phys->ds_bp; 1257 dmu_buf_rele(dbuf, FTAG); 1258 1259 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1260 if (ds->ds_prev) { 1261 uint64_t next_clones_obj = 1262 ds->ds_prev->ds_phys->ds_next_clones_obj; 1263 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1264 ds->ds_object || 1265 ds->ds_prev->ds_phys->ds_num_children > 1); 1266 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1267 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1268 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1269 ds->ds_prev->ds_phys->ds_creation_txg); 1270 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1271 } else if (next_clones_obj != 0) { 1272 dsl_dataset_remove_from_next_clones(ds->ds_prev, 1273 dsphys->ds_next_snap_obj, tx); 1274 VERIFY0(zap_add_int(mos, 1275 next_clones_obj, dsobj, tx)); 1276 } 1277 } 1278 1279 /* 1280 * If we have a reference-reservation on this dataset, we will 1281 * need to increase the amount of refreservation being charged 1282 * since our unique space is going to zero. 1283 */ 1284 if (ds->ds_reserved) { 1285 int64_t delta; 1286 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 1287 delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 1288 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1289 delta, 0, 0, tx); 1290 } 1291 1292 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1293 ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, 1294 UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); 1295 dsl_deadlist_close(&ds->ds_deadlist); 1296 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 1297 dsl_deadlist_add_key(&ds->ds_deadlist, 1298 ds->ds_phys->ds_prev_snap_txg, tx); 1299 1300 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1301 ds->ds_phys->ds_prev_snap_obj = dsobj; 1302 ds->ds_phys->ds_prev_snap_txg = crtxg; 1303 ds->ds_phys->ds_unique_bytes = 0; 1304 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1305 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1306 1307 VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1308 snapname, 8, 1, &dsobj, tx)); 1309 1310 if (ds->ds_prev) 1311 dsl_dataset_rele(ds->ds_prev, ds); 1312 VERIFY0(dsl_dataset_hold_obj(dp, 1313 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1314 1315 dsl_scan_ds_snapshotted(ds, tx); 1316 1317 dsl_dir_snap_cmtime_update(ds->ds_dir); 1318 1319 spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); 1320} 1321 1322static void 1323dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) 1324{ 1325 dsl_dataset_snapshot_arg_t *ddsa = arg; 1326 dsl_pool_t *dp = dmu_tx_pool(tx); 1327 nvpair_t *pair; 1328 1329 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1330 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1331 dsl_dataset_t *ds; 1332 char *name, *atp; 1333 char dsname[MAXNAMELEN]; 1334 1335 name = nvpair_name(pair); 1336 atp = strchr(name, '@'); 1337 (void) strlcpy(dsname, name, atp - name + 1); 1338 VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); 1339 1340 dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); 1341 if (ddsa->ddsa_props != NULL) { 1342 dsl_props_set_sync_impl(ds->ds_prev, 1343 ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); 1344 } 1345 dsl_dataset_rele(ds, FTAG); 1346 } 1347} 1348 1349/* 1350 * The snapshots must all be in the same pool. 1351 * All-or-nothing: if there are any failures, nothing will be modified. 1352 */ 1353int 1354dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) 1355{ 1356 dsl_dataset_snapshot_arg_t ddsa; 1357 nvpair_t *pair; 1358 boolean_t needsuspend; 1359 int error; 1360 spa_t *spa; 1361 char *firstname; 1362 nvlist_t *suspended = NULL; 1363 1364 pair = nvlist_next_nvpair(snaps, NULL); 1365 if (pair == NULL) 1366 return (0); 1367 firstname = nvpair_name(pair); 1368 1369 error = spa_open(firstname, &spa, FTAG); 1370 if (error != 0) 1371 return (error); 1372 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1373 spa_close(spa, FTAG); 1374 1375 if (needsuspend) { 1376 suspended = fnvlist_alloc(); 1377 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1378 pair = nvlist_next_nvpair(snaps, pair)) { 1379 char fsname[MAXNAMELEN]; 1380 char *snapname = nvpair_name(pair); 1381 char *atp; 1382 void *cookie; 1383 1384 atp = strchr(snapname, '@'); 1385 if (atp == NULL) { 1386 error = SET_ERROR(EINVAL); 1387 break; 1388 } 1389 (void) strlcpy(fsname, snapname, atp - snapname + 1); 1390 1391 error = zil_suspend(fsname, &cookie); 1392 if (error != 0) 1393 break; 1394 fnvlist_add_uint64(suspended, fsname, 1395 (uintptr_t)cookie); 1396 } 1397 } 1398 1399 ddsa.ddsa_snaps = snaps; 1400 ddsa.ddsa_props = props; 1401 ddsa.ddsa_errors = errors; 1402 ddsa.ddsa_cr = CRED(); 1403 1404 if (error == 0) { 1405 error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, 1406 dsl_dataset_snapshot_sync, &ddsa, 1407 fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL); 1408 } 1409 1410 if (suspended != NULL) { 1411 for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; 1412 pair = nvlist_next_nvpair(suspended, pair)) { 1413 zil_resume((void *)(uintptr_t) 1414 fnvpair_value_uint64(pair)); 1415 } 1416 fnvlist_free(suspended); 1417 } 1418 1419#ifdef __FreeBSD__ 1420#ifdef _KERNEL 1421 if (error == 0) { 1422 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1423 pair = nvlist_next_nvpair(snaps, pair)) { 1424 char *snapname = nvpair_name(pair); 1425 zvol_create_minors(snapname); 1426 } 1427 } 1428#endif 1429#endif 1430 return (error); 1431} 1432 1433typedef struct dsl_dataset_snapshot_tmp_arg { 1434 const char *ddsta_fsname; 1435 const char *ddsta_snapname; 1436 minor_t ddsta_cleanup_minor; 1437 const char *ddsta_htag; 1438} dsl_dataset_snapshot_tmp_arg_t; 1439 1440static int 1441dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) 1442{ 1443 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1444 dsl_pool_t *dp = dmu_tx_pool(tx); 1445 dsl_dataset_t *ds; 1446 int error; 1447 1448 error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); 1449 if (error != 0) 1450 return (error); 1451 1452 /* NULL cred means no limit check for tmp snapshot */ 1453 error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, 1454 tx, B_FALSE, 0, NULL); 1455 if (error != 0) { 1456 dsl_dataset_rele(ds, FTAG); 1457 return (error); 1458 } 1459 1460 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1461 dsl_dataset_rele(ds, FTAG); 1462 return (SET_ERROR(ENOTSUP)); 1463 } 1464 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1465 B_TRUE, tx); 1466 if (error != 0) { 1467 dsl_dataset_rele(ds, FTAG); 1468 return (error); 1469 } 1470 1471 dsl_dataset_rele(ds, FTAG); 1472 return (0); 1473} 1474 1475static void 1476dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1477{ 1478 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1479 dsl_pool_t *dp = dmu_tx_pool(tx); 1480 dsl_dataset_t *ds; 1481 1482 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1483 1484 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1485 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1486 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1487 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1488 1489 dsl_dataset_rele(ds, FTAG); 1490} 1491 1492int 1493dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1494 minor_t cleanup_minor, const char *htag) 1495{ 1496 dsl_dataset_snapshot_tmp_arg_t ddsta; 1497 int error; 1498 spa_t *spa; 1499 boolean_t needsuspend; 1500 void *cookie; 1501 1502 ddsta.ddsta_fsname = fsname; 1503 ddsta.ddsta_snapname = snapname; 1504 ddsta.ddsta_cleanup_minor = cleanup_minor; 1505 ddsta.ddsta_htag = htag; 1506 1507 error = spa_open(fsname, &spa, FTAG); 1508 if (error != 0) 1509 return (error); 1510 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1511 spa_close(spa, FTAG); 1512 1513 if (needsuspend) { 1514 error = zil_suspend(fsname, &cookie); 1515 if (error != 0) 1516 return (error); 1517 } 1518 1519 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1520 dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED); 1521 1522 if (needsuspend) 1523 zil_resume(cookie); 1524 return (error); 1525} 1526 1527 1528void 1529dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1530{ 1531 ASSERT(dmu_tx_is_syncing(tx)); 1532 ASSERT(ds->ds_objset != NULL); 1533 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1534 1535 /* 1536 * in case we had to change ds_fsid_guid when we opened it, 1537 * sync it out now. 1538 */ 1539 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1540 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1541 1542 dmu_objset_sync(ds->ds_objset, zio, tx); 1543} 1544 1545static void 1546get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1547{ 1548 uint64_t count = 0; 1549 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1550 zap_cursor_t zc; 1551 zap_attribute_t za; 1552 nvlist_t *propval = fnvlist_alloc(); 1553 nvlist_t *val = fnvlist_alloc(); 1554 1555 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1556 1557 /* 1558 * There may be missing entries in ds_next_clones_obj 1559 * due to a bug in a previous version of the code. 1560 * Only trust it if it has the right number of entries. 1561 */ 1562 if (ds->ds_phys->ds_next_clones_obj != 0) { 1563 VERIFY0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1564 &count)); 1565 } 1566 if (count != ds->ds_phys->ds_num_children - 1) 1567 goto fail; 1568 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); 1569 zap_cursor_retrieve(&zc, &za) == 0; 1570 zap_cursor_advance(&zc)) { 1571 dsl_dataset_t *clone; 1572 char buf[ZFS_MAXNAMELEN]; 1573 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1574 za.za_first_integer, FTAG, &clone)); 1575 dsl_dir_name(clone->ds_dir, buf); 1576 fnvlist_add_boolean(val, buf); 1577 dsl_dataset_rele(clone, FTAG); 1578 } 1579 zap_cursor_fini(&zc); 1580 fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1581 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1582fail: 1583 nvlist_free(val); 1584 nvlist_free(propval); 1585} 1586 1587void 1588dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1589{ 1590 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1591 uint64_t refd, avail, uobjs, aobjs, ratio; 1592 1593 ASSERT(dsl_pool_config_held(dp)); 1594 1595 ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1596 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1597 ds->ds_phys->ds_compressed_bytes); 1598 1599 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1600 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1601 ds->ds_phys->ds_uncompressed_bytes); 1602 1603 if (dsl_dataset_is_snapshot(ds)) { 1604 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1605 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1606 ds->ds_phys->ds_unique_bytes); 1607 get_clones_stat(ds, nv); 1608 } else { 1609 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { 1610 char buf[MAXNAMELEN]; 1611 dsl_dataset_name(ds->ds_prev, buf); 1612 dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); 1613 } 1614 1615 dsl_dir_stats(ds->ds_dir, nv); 1616 } 1617 1618 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1619 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1620 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1621 1622 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1623 ds->ds_phys->ds_creation_time); 1624 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1625 ds->ds_phys->ds_creation_txg); 1626 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1627 ds->ds_quota); 1628 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1629 ds->ds_reserved); 1630 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1631 ds->ds_phys->ds_guid); 1632 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1633 ds->ds_phys->ds_unique_bytes); 1634 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1635 ds->ds_object); 1636 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1637 ds->ds_userrefs); 1638 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1639 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1640 1641 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1642 uint64_t written, comp, uncomp; 1643 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1644 dsl_dataset_t *prev; 1645 1646 int err = dsl_dataset_hold_obj(dp, 1647 ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); 1648 if (err == 0) { 1649 err = dsl_dataset_space_written(prev, ds, &written, 1650 &comp, &uncomp); 1651 dsl_dataset_rele(prev, FTAG); 1652 if (err == 0) { 1653 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1654 written); 1655 } 1656 } 1657 } 1658} 1659 1660void 1661dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1662{ 1663 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1664 ASSERT(dsl_pool_config_held(dp)); 1665 1666 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1667 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1668 stat->dds_guid = ds->ds_phys->ds_guid; 1669 stat->dds_origin[0] = '\0'; 1670 if (dsl_dataset_is_snapshot(ds)) { 1671 stat->dds_is_snapshot = B_TRUE; 1672 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1673 } else { 1674 stat->dds_is_snapshot = B_FALSE; 1675 stat->dds_num_clones = 0; 1676 1677 if (dsl_dir_is_clone(ds->ds_dir)) { 1678 dsl_dataset_t *ods; 1679 1680 VERIFY0(dsl_dataset_hold_obj(dp, 1681 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1682 dsl_dataset_name(ods, stat->dds_origin); 1683 dsl_dataset_rele(ods, FTAG); 1684 } 1685 } 1686} 1687 1688uint64_t 1689dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1690{ 1691 return (ds->ds_fsid_guid); 1692} 1693 1694void 1695dsl_dataset_space(dsl_dataset_t *ds, 1696 uint64_t *refdbytesp, uint64_t *availbytesp, 1697 uint64_t *usedobjsp, uint64_t *availobjsp) 1698{ 1699 *refdbytesp = ds->ds_phys->ds_referenced_bytes; 1700 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1701 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1702 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1703 if (ds->ds_quota != 0) { 1704 /* 1705 * Adjust available bytes according to refquota 1706 */ 1707 if (*refdbytesp < ds->ds_quota) 1708 *availbytesp = MIN(*availbytesp, 1709 ds->ds_quota - *refdbytesp); 1710 else 1711 *availbytesp = 0; 1712 } 1713 *usedobjsp = BP_GET_FILL(&ds->ds_phys->ds_bp); 1714 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1715} 1716 1717boolean_t 1718dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) 1719{ 1720 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1721 1722 ASSERT(dsl_pool_config_held(dp)); 1723 if (snap == NULL) 1724 return (B_FALSE); 1725 if (ds->ds_phys->ds_bp.blk_birth > 1726 snap->ds_phys->ds_creation_txg) { 1727 objset_t *os, *os_snap; 1728 /* 1729 * It may be that only the ZIL differs, because it was 1730 * reset in the head. Don't count that as being 1731 * modified. 1732 */ 1733 if (dmu_objset_from_ds(ds, &os) != 0) 1734 return (B_TRUE); 1735 if (dmu_objset_from_ds(snap, &os_snap) != 0) 1736 return (B_TRUE); 1737 return (bcmp(&os->os_phys->os_meta_dnode, 1738 &os_snap->os_phys->os_meta_dnode, 1739 sizeof (os->os_phys->os_meta_dnode)) != 0); 1740 } 1741 return (B_FALSE); 1742} 1743 1744typedef struct dsl_dataset_rename_snapshot_arg { 1745 const char *ddrsa_fsname; 1746 const char *ddrsa_oldsnapname; 1747 const char *ddrsa_newsnapname; 1748 boolean_t ddrsa_recursive; 1749 dmu_tx_t *ddrsa_tx; 1750} dsl_dataset_rename_snapshot_arg_t; 1751 1752/* ARGSUSED */ 1753static int 1754dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 1755 dsl_dataset_t *hds, void *arg) 1756{ 1757 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1758 int error; 1759 uint64_t val; 1760 1761 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1762 if (error != 0) { 1763 /* ignore nonexistent snapshots */ 1764 return (error == ENOENT ? 0 : error); 1765 } 1766 1767 /* new name should not exist */ 1768 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 1769 if (error == 0) 1770 error = SET_ERROR(EEXIST); 1771 else if (error == ENOENT) 1772 error = 0; 1773 1774 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1775 if (dsl_dir_namelen(hds->ds_dir) + 1 + 1776 strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) 1777 error = SET_ERROR(ENAMETOOLONG); 1778 1779 return (error); 1780} 1781 1782static int 1783dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 1784{ 1785 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1786 dsl_pool_t *dp = dmu_tx_pool(tx); 1787 dsl_dataset_t *hds; 1788 int error; 1789 1790 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 1791 if (error != 0) 1792 return (error); 1793 1794 if (ddrsa->ddrsa_recursive) { 1795 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1796 dsl_dataset_rename_snapshot_check_impl, ddrsa, 1797 DS_FIND_CHILDREN); 1798 } else { 1799 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 1800 } 1801 dsl_dataset_rele(hds, FTAG); 1802 return (error); 1803} 1804 1805static int 1806dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 1807 dsl_dataset_t *hds, void *arg) 1808{ 1809#ifdef __FreeBSD__ 1810#ifdef _KERNEL 1811 char *oldname, *newname; 1812#endif 1813#endif 1814 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1815 dsl_dataset_t *ds; 1816 uint64_t val; 1817 dmu_tx_t *tx = ddrsa->ddrsa_tx; 1818 int error; 1819 1820 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1821 ASSERT(error == 0 || error == ENOENT); 1822 if (error == ENOENT) { 1823 /* ignore nonexistent snapshots */ 1824 return (0); 1825 } 1826 1827 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 1828 1829 /* log before we change the name */ 1830 spa_history_log_internal_ds(ds, "rename", tx, 1831 "-> @%s", ddrsa->ddrsa_newsnapname); 1832 1833 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx, 1834 B_FALSE)); 1835 mutex_enter(&ds->ds_lock); 1836 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 1837 mutex_exit(&ds->ds_lock); 1838 VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, 1839 ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 1840 1841#ifdef __FreeBSD__ 1842#ifdef _KERNEL 1843 oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1844 newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1845 snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1846 ddrsa->ddrsa_oldsnapname); 1847 snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1848 ddrsa->ddrsa_newsnapname); 1849 zfsvfs_update_fromname(oldname, newname); 1850 zvol_rename_minors(oldname, newname); 1851 kmem_free(newname, MAXPATHLEN); 1852 kmem_free(oldname, MAXPATHLEN); 1853#endif 1854#endif 1855 dsl_dataset_rele(ds, FTAG); 1856 1857 return (0); 1858} 1859 1860static void 1861dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 1862{ 1863 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1864 dsl_pool_t *dp = dmu_tx_pool(tx); 1865 dsl_dataset_t *hds; 1866 1867 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 1868 ddrsa->ddrsa_tx = tx; 1869 if (ddrsa->ddrsa_recursive) { 1870 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1871 dsl_dataset_rename_snapshot_sync_impl, ddrsa, 1872 DS_FIND_CHILDREN)); 1873 } else { 1874 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 1875 } 1876 dsl_dataset_rele(hds, FTAG); 1877} 1878 1879int 1880dsl_dataset_rename_snapshot(const char *fsname, 1881 const char *oldsnapname, const char *newsnapname, boolean_t recursive) 1882{ 1883 dsl_dataset_rename_snapshot_arg_t ddrsa; 1884 1885 ddrsa.ddrsa_fsname = fsname; 1886 ddrsa.ddrsa_oldsnapname = oldsnapname; 1887 ddrsa.ddrsa_newsnapname = newsnapname; 1888 ddrsa.ddrsa_recursive = recursive; 1889 1890 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 1891 dsl_dataset_rename_snapshot_sync, &ddrsa, 1892 1, ZFS_SPACE_CHECK_RESERVED)); 1893} 1894 1895/* 1896 * If we're doing an ownership handoff, we need to make sure that there is 1897 * only one long hold on the dataset. We're not allowed to change anything here 1898 * so we don't permanently release the long hold or regular hold here. We want 1899 * to do this only when syncing to avoid the dataset unexpectedly going away 1900 * when we release the long hold. 1901 */ 1902static int 1903dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 1904{ 1905 boolean_t held; 1906 1907 if (!dmu_tx_is_syncing(tx)) 1908 return (0); 1909 1910 if (owner != NULL) { 1911 VERIFY3P(ds->ds_owner, ==, owner); 1912 dsl_dataset_long_rele(ds, owner); 1913 } 1914 1915 held = dsl_dataset_long_held(ds); 1916 1917 if (owner != NULL) 1918 dsl_dataset_long_hold(ds, owner); 1919 1920 if (held) 1921 return (SET_ERROR(EBUSY)); 1922 1923 return (0); 1924} 1925 1926typedef struct dsl_dataset_rollback_arg { 1927 const char *ddra_fsname; 1928 void *ddra_owner; 1929 nvlist_t *ddra_result; 1930} dsl_dataset_rollback_arg_t; 1931 1932static int 1933dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 1934{ 1935 dsl_dataset_rollback_arg_t *ddra = arg; 1936 dsl_pool_t *dp = dmu_tx_pool(tx); 1937 dsl_dataset_t *ds; 1938 int64_t unused_refres_delta; 1939 int error; 1940 1941 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 1942 if (error != 0) 1943 return (error); 1944 1945 /* must not be a snapshot */ 1946 if (dsl_dataset_is_snapshot(ds)) { 1947 dsl_dataset_rele(ds, FTAG); 1948 return (SET_ERROR(EINVAL)); 1949 } 1950 1951 /* must have a most recent snapshot */ 1952 if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { 1953 dsl_dataset_rele(ds, FTAG); 1954 return (SET_ERROR(EINVAL)); 1955 } 1956 1957 /* must not have any bookmarks after the most recent snapshot */ 1958 nvlist_t *proprequest = fnvlist_alloc(); 1959 fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); 1960 nvlist_t *bookmarks = fnvlist_alloc(); 1961 error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); 1962 fnvlist_free(proprequest); 1963 if (error != 0) 1964 return (error); 1965 for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL); 1966 pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) { 1967 nvlist_t *valuenv = 1968 fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), 1969 zfs_prop_to_name(ZFS_PROP_CREATETXG)); 1970 uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); 1971 if (createtxg > ds->ds_phys->ds_prev_snap_txg) { 1972 fnvlist_free(bookmarks); 1973 dsl_dataset_rele(ds, FTAG); 1974 return (SET_ERROR(EEXIST)); 1975 } 1976 } 1977 fnvlist_free(bookmarks); 1978 1979 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 1980 if (error != 0) { 1981 dsl_dataset_rele(ds, FTAG); 1982 return (error); 1983 } 1984 1985 /* 1986 * Check if the snap we are rolling back to uses more than 1987 * the refquota. 1988 */ 1989 if (ds->ds_quota != 0 && 1990 ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { 1991 dsl_dataset_rele(ds, FTAG); 1992 return (SET_ERROR(EDQUOT)); 1993 } 1994 1995 /* 1996 * When we do the clone swap, we will temporarily use more space 1997 * due to the refreservation (the head will no longer have any 1998 * unique space, so the entire amount of the refreservation will need 1999 * to be free). We will immediately destroy the clone, freeing 2000 * this space, but the freeing happens over many txg's. 2001 */ 2002 unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 2003 ds->ds_phys->ds_unique_bytes); 2004 2005 if (unused_refres_delta > 0 && 2006 unused_refres_delta > 2007 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 2008 dsl_dataset_rele(ds, FTAG); 2009 return (SET_ERROR(ENOSPC)); 2010 } 2011 2012 dsl_dataset_rele(ds, FTAG); 2013 return (0); 2014} 2015 2016static void 2017dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 2018{ 2019 dsl_dataset_rollback_arg_t *ddra = arg; 2020 dsl_pool_t *dp = dmu_tx_pool(tx); 2021 dsl_dataset_t *ds, *clone; 2022 uint64_t cloneobj; 2023 char namebuf[ZFS_MAXNAMELEN]; 2024 2025 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 2026 2027 dsl_dataset_name(ds->ds_prev, namebuf); 2028 fnvlist_add_string(ddra->ddra_result, "target", namebuf); 2029 2030 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 2031 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 2032 2033 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 2034 2035 dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 2036 dsl_dataset_zero_zil(ds, tx); 2037 2038 dsl_destroy_head_sync_impl(clone, tx); 2039 2040 dsl_dataset_rele(clone, FTAG); 2041 dsl_dataset_rele(ds, FTAG); 2042} 2043 2044/* 2045 * Rolls back the given filesystem or volume to the most recent snapshot. 2046 * The name of the most recent snapshot will be returned under key "target" 2047 * in the result nvlist. 2048 * 2049 * If owner != NULL: 2050 * - The existing dataset MUST be owned by the specified owner at entry 2051 * - Upon return, dataset will still be held by the same owner, whether we 2052 * succeed or not. 2053 * 2054 * This mode is required any time the existing filesystem is mounted. See 2055 * notes above zfs_suspend_fs() for further details. 2056 */ 2057int 2058dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result) 2059{ 2060 dsl_dataset_rollback_arg_t ddra; 2061 2062 ddra.ddra_fsname = fsname; 2063 ddra.ddra_owner = owner; 2064 ddra.ddra_result = result; 2065 2066 return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 2067 dsl_dataset_rollback_sync, &ddra, 2068 1, ZFS_SPACE_CHECK_RESERVED)); 2069} 2070 2071struct promotenode { 2072 list_node_t link; 2073 dsl_dataset_t *ds; 2074}; 2075 2076typedef struct dsl_dataset_promote_arg { 2077 const char *ddpa_clonename; 2078 dsl_dataset_t *ddpa_clone; 2079 list_t shared_snaps, origin_snaps, clone_snaps; 2080 dsl_dataset_t *origin_origin; /* origin of the origin */ 2081 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2082 char *err_ds; 2083 cred_t *cr; 2084} dsl_dataset_promote_arg_t; 2085 2086static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2087static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 2088 void *tag); 2089static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 2090 2091static int 2092dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 2093{ 2094 dsl_dataset_promote_arg_t *ddpa = arg; 2095 dsl_pool_t *dp = dmu_tx_pool(tx); 2096 dsl_dataset_t *hds; 2097 struct promotenode *snap; 2098 dsl_dataset_t *origin_ds; 2099 int err; 2100 uint64_t unused; 2101 uint64_t ss_mv_cnt; 2102 2103 err = promote_hold(ddpa, dp, FTAG); 2104 if (err != 0) 2105 return (err); 2106 2107 hds = ddpa->ddpa_clone; 2108 2109 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 2110 promote_rele(ddpa, FTAG); 2111 return (SET_ERROR(EXDEV)); 2112 } 2113 2114 /* 2115 * Compute and check the amount of space to transfer. Since this is 2116 * so expensive, don't do the preliminary check. 2117 */ 2118 if (!dmu_tx_is_syncing(tx)) { 2119 promote_rele(ddpa, FTAG); 2120 return (0); 2121 } 2122 2123 snap = list_head(&ddpa->shared_snaps); 2124 origin_ds = snap->ds; 2125 2126 /* compute origin's new unique space */ 2127 snap = list_tail(&ddpa->clone_snaps); 2128 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2129 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2130 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2131 &ddpa->unique, &unused, &unused); 2132 2133 /* 2134 * Walk the snapshots that we are moving 2135 * 2136 * Compute space to transfer. Consider the incremental changes 2137 * to used by each snapshot: 2138 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2139 * So each snapshot gave birth to: 2140 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2141 * So a sequence would look like: 2142 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2143 * Which simplifies to: 2144 * uN + kN + kN-1 + ... + k1 + k0 2145 * Note however, if we stop before we reach the ORIGIN we get: 2146 * uN + kN + kN-1 + ... + kM - uM-1 2147 */ 2148 ss_mv_cnt = 0; 2149 ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; 2150 ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2151 ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2152 for (snap = list_head(&ddpa->shared_snaps); snap; 2153 snap = list_next(&ddpa->shared_snaps, snap)) { 2154 uint64_t val, dlused, dlcomp, dluncomp; 2155 dsl_dataset_t *ds = snap->ds; 2156 2157 ss_mv_cnt++; 2158 2159 /* 2160 * If there are long holds, we won't be able to evict 2161 * the objset. 2162 */ 2163 if (dsl_dataset_long_held(ds)) { 2164 err = SET_ERROR(EBUSY); 2165 goto out; 2166 } 2167 2168 /* Check that the snapshot name does not conflict */ 2169 VERIFY0(dsl_dataset_get_snapname(ds)); 2170 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2171 if (err == 0) { 2172 (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 2173 err = SET_ERROR(EEXIST); 2174 goto out; 2175 } 2176 if (err != ENOENT) 2177 goto out; 2178 2179 /* The very first snapshot does not have a deadlist */ 2180 if (ds->ds_phys->ds_prev_snap_obj == 0) 2181 continue; 2182 2183 dsl_deadlist_space(&ds->ds_deadlist, 2184 &dlused, &dlcomp, &dluncomp); 2185 ddpa->used += dlused; 2186 ddpa->comp += dlcomp; 2187 ddpa->uncomp += dluncomp; 2188 } 2189 2190 /* 2191 * If we are a clone of a clone then we never reached ORIGIN, 2192 * so we need to subtract out the clone origin's used space. 2193 */ 2194 if (ddpa->origin_origin) { 2195 ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; 2196 ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; 2197 ddpa->uncomp -= 2198 ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; 2199 } 2200 2201 /* Check that there is enough space and limit headroom here */ 2202 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2203 0, ss_mv_cnt, ddpa->used, ddpa->cr); 2204 if (err != 0) 2205 goto out; 2206 2207 /* 2208 * Compute the amounts of space that will be used by snapshots 2209 * after the promotion (for both origin and clone). For each, 2210 * it is the amount of space that will be on all of their 2211 * deadlists (that was not born before their new origin). 2212 */ 2213 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2214 uint64_t space; 2215 2216 /* 2217 * Note, typically this will not be a clone of a clone, 2218 * so dd_origin_txg will be < TXG_INITIAL, so 2219 * these snaplist_space() -> dsl_deadlist_space_range() 2220 * calls will be fast because they do not have to 2221 * iterate over all bps. 2222 */ 2223 snap = list_head(&ddpa->origin_snaps); 2224 err = snaplist_space(&ddpa->shared_snaps, 2225 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2226 if (err != 0) 2227 goto out; 2228 2229 err = snaplist_space(&ddpa->clone_snaps, 2230 snap->ds->ds_dir->dd_origin_txg, &space); 2231 if (err != 0) 2232 goto out; 2233 ddpa->cloneusedsnap += space; 2234 } 2235 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2236 err = snaplist_space(&ddpa->origin_snaps, 2237 origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); 2238 if (err != 0) 2239 goto out; 2240 } 2241 2242out: 2243 promote_rele(ddpa, FTAG); 2244 return (err); 2245} 2246 2247static void 2248dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 2249{ 2250 dsl_dataset_promote_arg_t *ddpa = arg; 2251 dsl_pool_t *dp = dmu_tx_pool(tx); 2252 dsl_dataset_t *hds; 2253 struct promotenode *snap; 2254 dsl_dataset_t *origin_ds; 2255 dsl_dataset_t *origin_head; 2256 dsl_dir_t *dd; 2257 dsl_dir_t *odd = NULL; 2258 uint64_t oldnext_obj; 2259 int64_t delta; 2260#if defined(__FreeBSD__) && defined(_KERNEL) 2261 char *oldname, *newname; 2262#endif 2263 2264 VERIFY0(promote_hold(ddpa, dp, FTAG)); 2265 hds = ddpa->ddpa_clone; 2266 2267 ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); 2268 2269 snap = list_head(&ddpa->shared_snaps); 2270 origin_ds = snap->ds; 2271 dd = hds->ds_dir; 2272 2273 snap = list_head(&ddpa->origin_snaps); 2274 origin_head = snap->ds; 2275 2276 /* 2277 * We need to explicitly open odd, since origin_ds's dd will be 2278 * changing. 2279 */ 2280 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2281 NULL, FTAG, &odd)); 2282 2283 /* change origin's next snap */ 2284 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2285 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2286 snap = list_tail(&ddpa->clone_snaps); 2287 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2288 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2289 2290 /* change the origin's next clone */ 2291 if (origin_ds->ds_phys->ds_next_clones_obj) { 2292 dsl_dataset_remove_from_next_clones(origin_ds, 2293 snap->ds->ds_object, tx); 2294 VERIFY0(zap_add_int(dp->dp_meta_objset, 2295 origin_ds->ds_phys->ds_next_clones_obj, 2296 oldnext_obj, tx)); 2297 } 2298 2299 /* change origin */ 2300 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2301 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2302 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2303 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2304 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2305 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2306 origin_head->ds_dir->dd_origin_txg = 2307 origin_ds->ds_phys->ds_creation_txg; 2308 2309 /* change dd_clone entries */ 2310 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2311 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2312 odd->dd_phys->dd_clones, hds->ds_object, tx)); 2313 VERIFY0(zap_add_int(dp->dp_meta_objset, 2314 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2315 hds->ds_object, tx)); 2316 2317 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2318 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2319 origin_head->ds_object, tx)); 2320 if (dd->dd_phys->dd_clones == 0) { 2321 dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 2322 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 2323 } 2324 VERIFY0(zap_add_int(dp->dp_meta_objset, 2325 dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 2326 } 2327 2328#if defined(__FreeBSD__) && defined(_KERNEL) 2329 /* Take the spa_namespace_lock early so zvol renames don't deadlock. */ 2330 mutex_enter(&spa_namespace_lock); 2331 2332 oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2333 newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2334#endif 2335 2336 /* move snapshots to this dir */ 2337 for (snap = list_head(&ddpa->shared_snaps); snap; 2338 snap = list_next(&ddpa->shared_snaps, snap)) { 2339 dsl_dataset_t *ds = snap->ds; 2340 2341 /* 2342 * Property callbacks are registered to a particular 2343 * dsl_dir. Since ours is changing, evict the objset 2344 * so that they will be unregistered from the old dsl_dir. 2345 */ 2346 if (ds->ds_objset) { 2347 dmu_objset_evict(ds->ds_objset); 2348 ds->ds_objset = NULL; 2349 } 2350 2351 /* move snap name entry */ 2352 VERIFY0(dsl_dataset_get_snapname(ds)); 2353 VERIFY0(dsl_dataset_snap_remove(origin_head, 2354 ds->ds_snapname, tx, B_TRUE)); 2355 VERIFY0(zap_add(dp->dp_meta_objset, 2356 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2357 8, 1, &ds->ds_object, tx)); 2358 dsl_fs_ss_count_adjust(hds->ds_dir, 1, 2359 DD_FIELD_SNAPSHOT_COUNT, tx); 2360 2361 /* change containing dsl_dir */ 2362 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2363 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2364 ds->ds_phys->ds_dir_obj = dd->dd_object; 2365 ASSERT3P(ds->ds_dir, ==, odd); 2366 dsl_dir_rele(ds->ds_dir, ds); 2367 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2368 NULL, ds, &ds->ds_dir)); 2369 2370#if defined(__FreeBSD__) && defined(_KERNEL) 2371 dsl_dataset_name(ds, newname); 2372 zfsvfs_update_fromname(oldname, newname); 2373 zvol_rename_minors(oldname, newname); 2374#endif 2375 2376 /* move any clone references */ 2377 if (ds->ds_phys->ds_next_clones_obj && 2378 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2379 zap_cursor_t zc; 2380 zap_attribute_t za; 2381 2382 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2383 ds->ds_phys->ds_next_clones_obj); 2384 zap_cursor_retrieve(&zc, &za) == 0; 2385 zap_cursor_advance(&zc)) { 2386 dsl_dataset_t *cnds; 2387 uint64_t o; 2388 2389 if (za.za_first_integer == oldnext_obj) { 2390 /* 2391 * We've already moved the 2392 * origin's reference. 2393 */ 2394 continue; 2395 } 2396 2397 VERIFY0(dsl_dataset_hold_obj(dp, 2398 za.za_first_integer, FTAG, &cnds)); 2399 o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 2400 2401 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2402 odd->dd_phys->dd_clones, o, tx)); 2403 VERIFY0(zap_add_int(dp->dp_meta_objset, 2404 dd->dd_phys->dd_clones, o, tx)); 2405 dsl_dataset_rele(cnds, FTAG); 2406 } 2407 zap_cursor_fini(&zc); 2408 } 2409 2410 ASSERT(!dsl_prop_hascb(ds)); 2411 } 2412 2413#if defined(__FreeBSD__) && defined(_KERNEL) 2414 mutex_exit(&spa_namespace_lock); 2415 2416 kmem_free(newname, MAXPATHLEN); 2417 kmem_free(oldname, MAXPATHLEN); 2418#endif 2419 /* 2420 * Change space accounting. 2421 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2422 * both be valid, or both be 0 (resulting in delta == 0). This 2423 * is true for each of {clone,origin} independently. 2424 */ 2425 2426 delta = ddpa->cloneusedsnap - 2427 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2428 ASSERT3S(delta, >=, 0); 2429 ASSERT3U(ddpa->used, >=, delta); 2430 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2431 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2432 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2433 2434 delta = ddpa->originusedsnap - 2435 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2436 ASSERT3S(delta, <=, 0); 2437 ASSERT3U(ddpa->used, >=, -delta); 2438 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2439 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2440 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2441 2442 origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; 2443 2444 /* log history record */ 2445 spa_history_log_internal_ds(hds, "promote", tx, ""); 2446 2447 dsl_dir_rele(odd, FTAG); 2448 promote_rele(ddpa, FTAG); 2449} 2450 2451/* 2452 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2453 * (exclusive) and last_obj (inclusive). The list will be in reverse 2454 * order (last_obj will be the list_head()). If first_obj == 0, do all 2455 * snapshots back to this dataset's origin. 2456 */ 2457static int 2458snaplist_make(dsl_pool_t *dp, 2459 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2460{ 2461 uint64_t obj = last_obj; 2462 2463 list_create(l, sizeof (struct promotenode), 2464 offsetof(struct promotenode, link)); 2465 2466 while (obj != first_obj) { 2467 dsl_dataset_t *ds; 2468 struct promotenode *snap; 2469 int err; 2470 2471 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2472 ASSERT(err != ENOENT); 2473 if (err != 0) 2474 return (err); 2475 2476 if (first_obj == 0) 2477 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2478 2479 snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2480 snap->ds = ds; 2481 list_insert_tail(l, snap); 2482 obj = ds->ds_phys->ds_prev_snap_obj; 2483 } 2484 2485 return (0); 2486} 2487 2488static int 2489snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2490{ 2491 struct promotenode *snap; 2492 2493 *spacep = 0; 2494 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2495 uint64_t used, comp, uncomp; 2496 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2497 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2498 *spacep += used; 2499 } 2500 return (0); 2501} 2502 2503static void 2504snaplist_destroy(list_t *l, void *tag) 2505{ 2506 struct promotenode *snap; 2507 2508 if (l == NULL || !list_link_active(&l->list_head)) 2509 return; 2510 2511 while ((snap = list_tail(l)) != NULL) { 2512 list_remove(l, snap); 2513 dsl_dataset_rele(snap->ds, tag); 2514 kmem_free(snap, sizeof (*snap)); 2515 } 2516 list_destroy(l); 2517} 2518 2519static int 2520promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2521{ 2522 int error; 2523 dsl_dir_t *dd; 2524 struct promotenode *snap; 2525 2526 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2527 &ddpa->ddpa_clone); 2528 if (error != 0) 2529 return (error); 2530 dd = ddpa->ddpa_clone->ds_dir; 2531 2532 if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || 2533 !dsl_dir_is_clone(dd)) { 2534 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2535 return (SET_ERROR(EINVAL)); 2536 } 2537 2538 error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, 2539 &ddpa->shared_snaps, tag); 2540 if (error != 0) 2541 goto out; 2542 2543 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2544 &ddpa->clone_snaps, tag); 2545 if (error != 0) 2546 goto out; 2547 2548 snap = list_head(&ddpa->shared_snaps); 2549 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2550 error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, 2551 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, 2552 &ddpa->origin_snaps, tag); 2553 if (error != 0) 2554 goto out; 2555 2556 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 2557 error = dsl_dataset_hold_obj(dp, 2558 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2559 tag, &ddpa->origin_origin); 2560 if (error != 0) 2561 goto out; 2562 } 2563out: 2564 if (error != 0) 2565 promote_rele(ddpa, tag); 2566 return (error); 2567} 2568 2569static void 2570promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2571{ 2572 snaplist_destroy(&ddpa->shared_snaps, tag); 2573 snaplist_destroy(&ddpa->clone_snaps, tag); 2574 snaplist_destroy(&ddpa->origin_snaps, tag); 2575 if (ddpa->origin_origin != NULL) 2576 dsl_dataset_rele(ddpa->origin_origin, tag); 2577 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2578} 2579 2580/* 2581 * Promote a clone. 2582 * 2583 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2584 * in with the name. (It must be at least MAXNAMELEN bytes long.) 2585 */ 2586int 2587dsl_dataset_promote(const char *name, char *conflsnap) 2588{ 2589 dsl_dataset_promote_arg_t ddpa = { 0 }; 2590 uint64_t numsnaps; 2591 int error; 2592 objset_t *os; 2593 2594 /* 2595 * We will modify space proportional to the number of 2596 * snapshots. Compute numsnaps. 2597 */ 2598 error = dmu_objset_hold(name, FTAG, &os); 2599 if (error != 0) 2600 return (error); 2601 error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2602 dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); 2603 dmu_objset_rele(os, FTAG); 2604 if (error != 0) 2605 return (error); 2606 2607 ddpa.ddpa_clonename = name; 2608 ddpa.err_ds = conflsnap; 2609 ddpa.cr = CRED(); 2610 2611 return (dsl_sync_task(name, dsl_dataset_promote_check, 2612 dsl_dataset_promote_sync, &ddpa, 2613 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED)); 2614} 2615 2616int 2617dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2618 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2619{ 2620 int64_t unused_refres_delta; 2621 2622 /* they should both be heads */ 2623 if (dsl_dataset_is_snapshot(clone) || 2624 dsl_dataset_is_snapshot(origin_head)) 2625 return (SET_ERROR(EINVAL)); 2626 2627 /* if we are not forcing, the branch point should be just before them */ 2628 if (!force && clone->ds_prev != origin_head->ds_prev) 2629 return (SET_ERROR(EINVAL)); 2630 2631 /* clone should be the clone (unless they are unrelated) */ 2632 if (clone->ds_prev != NULL && 2633 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 2634 origin_head->ds_dir != clone->ds_prev->ds_dir) 2635 return (SET_ERROR(EINVAL)); 2636 2637 /* the clone should be a child of the origin */ 2638 if (clone->ds_dir->dd_parent != origin_head->ds_dir) 2639 return (SET_ERROR(EINVAL)); 2640 2641 /* origin_head shouldn't be modified unless 'force' */ 2642 if (!force && 2643 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) 2644 return (SET_ERROR(ETXTBSY)); 2645 2646 /* origin_head should have no long holds (e.g. is not mounted) */ 2647 if (dsl_dataset_handoff_check(origin_head, owner, tx)) 2648 return (SET_ERROR(EBUSY)); 2649 2650 /* check amount of any unconsumed refreservation */ 2651 unused_refres_delta = 2652 (int64_t)MIN(origin_head->ds_reserved, 2653 origin_head->ds_phys->ds_unique_bytes) - 2654 (int64_t)MIN(origin_head->ds_reserved, 2655 clone->ds_phys->ds_unique_bytes); 2656 2657 if (unused_refres_delta > 0 && 2658 unused_refres_delta > 2659 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 2660 return (SET_ERROR(ENOSPC)); 2661 2662 /* clone can't be over the head's refquota */ 2663 if (origin_head->ds_quota != 0 && 2664 clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) 2665 return (SET_ERROR(EDQUOT)); 2666 2667 return (0); 2668} 2669 2670void 2671dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 2672 dsl_dataset_t *origin_head, dmu_tx_t *tx) 2673{ 2674 dsl_pool_t *dp = dmu_tx_pool(tx); 2675 int64_t unused_refres_delta; 2676 2677 ASSERT(clone->ds_reserved == 0); 2678 ASSERT(origin_head->ds_quota == 0 || 2679 clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); 2680 ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); 2681 2682 dmu_buf_will_dirty(clone->ds_dbuf, tx); 2683 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2684 2685 if (clone->ds_objset != NULL) { 2686 dmu_objset_evict(clone->ds_objset); 2687 clone->ds_objset = NULL; 2688 } 2689 2690 if (origin_head->ds_objset != NULL) { 2691 dmu_objset_evict(origin_head->ds_objset); 2692 origin_head->ds_objset = NULL; 2693 } 2694 2695 unused_refres_delta = 2696 (int64_t)MIN(origin_head->ds_reserved, 2697 origin_head->ds_phys->ds_unique_bytes) - 2698 (int64_t)MIN(origin_head->ds_reserved, 2699 clone->ds_phys->ds_unique_bytes); 2700 2701 /* 2702 * Reset origin's unique bytes, if it exists. 2703 */ 2704 if (clone->ds_prev) { 2705 dsl_dataset_t *origin = clone->ds_prev; 2706 uint64_t comp, uncomp; 2707 2708 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2709 dsl_deadlist_space_range(&clone->ds_deadlist, 2710 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2711 &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 2712 } 2713 2714 /* swap blkptrs */ 2715 { 2716 blkptr_t tmp; 2717 tmp = origin_head->ds_phys->ds_bp; 2718 origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; 2719 clone->ds_phys->ds_bp = tmp; 2720 } 2721 2722 /* set dd_*_bytes */ 2723 { 2724 int64_t dused, dcomp, duncomp; 2725 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2726 uint64_t odl_used, odl_comp, odl_uncomp; 2727 2728 ASSERT3U(clone->ds_dir->dd_phys-> 2729 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2730 2731 dsl_deadlist_space(&clone->ds_deadlist, 2732 &cdl_used, &cdl_comp, &cdl_uncomp); 2733 dsl_deadlist_space(&origin_head->ds_deadlist, 2734 &odl_used, &odl_comp, &odl_uncomp); 2735 2736 dused = clone->ds_phys->ds_referenced_bytes + cdl_used - 2737 (origin_head->ds_phys->ds_referenced_bytes + odl_used); 2738 dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - 2739 (origin_head->ds_phys->ds_compressed_bytes + odl_comp); 2740 duncomp = clone->ds_phys->ds_uncompressed_bytes + 2741 cdl_uncomp - 2742 (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2743 2744 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 2745 dused, dcomp, duncomp, tx); 2746 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 2747 -dused, -dcomp, -duncomp, tx); 2748 2749 /* 2750 * The difference in the space used by snapshots is the 2751 * difference in snapshot space due to the head's 2752 * deadlist (since that's the only thing that's 2753 * changing that affects the snapused). 2754 */ 2755 dsl_deadlist_space_range(&clone->ds_deadlist, 2756 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2757 &cdl_used, &cdl_comp, &cdl_uncomp); 2758 dsl_deadlist_space_range(&origin_head->ds_deadlist, 2759 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2760 &odl_used, &odl_comp, &odl_uncomp); 2761 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 2762 DD_USED_HEAD, DD_USED_SNAP, tx); 2763 } 2764 2765 /* swap ds_*_bytes */ 2766 SWITCH64(origin_head->ds_phys->ds_referenced_bytes, 2767 clone->ds_phys->ds_referenced_bytes); 2768 SWITCH64(origin_head->ds_phys->ds_compressed_bytes, 2769 clone->ds_phys->ds_compressed_bytes); 2770 SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, 2771 clone->ds_phys->ds_uncompressed_bytes); 2772 SWITCH64(origin_head->ds_phys->ds_unique_bytes, 2773 clone->ds_phys->ds_unique_bytes); 2774 2775 /* apply any parent delta for change in unconsumed refreservation */ 2776 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 2777 unused_refres_delta, 0, 0, tx); 2778 2779 /* 2780 * Swap deadlists. 2781 */ 2782 dsl_deadlist_close(&clone->ds_deadlist); 2783 dsl_deadlist_close(&origin_head->ds_deadlist); 2784 SWITCH64(origin_head->ds_phys->ds_deadlist_obj, 2785 clone->ds_phys->ds_deadlist_obj); 2786 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 2787 clone->ds_phys->ds_deadlist_obj); 2788 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 2789 origin_head->ds_phys->ds_deadlist_obj); 2790 2791 dsl_scan_ds_clone_swapped(origin_head, clone, tx); 2792 2793 spa_history_log_internal_ds(clone, "clone swap", tx, 2794 "parent=%s", origin_head->ds_dir->dd_myname); 2795} 2796 2797/* 2798 * Given a pool name and a dataset object number in that pool, 2799 * return the name of that dataset. 2800 */ 2801int 2802dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2803{ 2804 dsl_pool_t *dp; 2805 dsl_dataset_t *ds; 2806 int error; 2807 2808 error = dsl_pool_hold(pname, FTAG, &dp); 2809 if (error != 0) 2810 return (error); 2811 2812 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 2813 if (error == 0) { 2814 dsl_dataset_name(ds, buf); 2815 dsl_dataset_rele(ds, FTAG); 2816 } 2817 dsl_pool_rele(dp, FTAG); 2818 2819 return (error); 2820} 2821 2822int 2823dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2824 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2825{ 2826 int error = 0; 2827 2828 ASSERT3S(asize, >, 0); 2829 2830 /* 2831 * *ref_rsrv is the portion of asize that will come from any 2832 * unconsumed refreservation space. 2833 */ 2834 *ref_rsrv = 0; 2835 2836 mutex_enter(&ds->ds_lock); 2837 /* 2838 * Make a space adjustment for reserved bytes. 2839 */ 2840 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2841 ASSERT3U(*used, >=, 2842 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2843 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2844 *ref_rsrv = 2845 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2846 } 2847 2848 if (!check_quota || ds->ds_quota == 0) { 2849 mutex_exit(&ds->ds_lock); 2850 return (0); 2851 } 2852 /* 2853 * If they are requesting more space, and our current estimate 2854 * is over quota, they get to try again unless the actual 2855 * on-disk is over quota and there are no pending changes (which 2856 * may free up space for us). 2857 */ 2858 if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { 2859 if (inflight > 0 || 2860 ds->ds_phys->ds_referenced_bytes < ds->ds_quota) 2861 error = SET_ERROR(ERESTART); 2862 else 2863 error = SET_ERROR(EDQUOT); 2864 } 2865 mutex_exit(&ds->ds_lock); 2866 2867 return (error); 2868} 2869 2870typedef struct dsl_dataset_set_qr_arg { 2871 const char *ddsqra_name; 2872 zprop_source_t ddsqra_source; 2873 uint64_t ddsqra_value; 2874} dsl_dataset_set_qr_arg_t; 2875 2876 2877/* ARGSUSED */ 2878static int 2879dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 2880{ 2881 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2882 dsl_pool_t *dp = dmu_tx_pool(tx); 2883 dsl_dataset_t *ds; 2884 int error; 2885 uint64_t newval; 2886 2887 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 2888 return (SET_ERROR(ENOTSUP)); 2889 2890 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2891 if (error != 0) 2892 return (error); 2893 2894 if (dsl_dataset_is_snapshot(ds)) { 2895 dsl_dataset_rele(ds, FTAG); 2896 return (SET_ERROR(EINVAL)); 2897 } 2898 2899 error = dsl_prop_predict(ds->ds_dir, 2900 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2901 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2902 if (error != 0) { 2903 dsl_dataset_rele(ds, FTAG); 2904 return (error); 2905 } 2906 2907 if (newval == 0) { 2908 dsl_dataset_rele(ds, FTAG); 2909 return (0); 2910 } 2911 2912 if (newval < ds->ds_phys->ds_referenced_bytes || 2913 newval < ds->ds_reserved) { 2914 dsl_dataset_rele(ds, FTAG); 2915 return (SET_ERROR(ENOSPC)); 2916 } 2917 2918 dsl_dataset_rele(ds, FTAG); 2919 return (0); 2920} 2921 2922static void 2923dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 2924{ 2925 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2926 dsl_pool_t *dp = dmu_tx_pool(tx); 2927 dsl_dataset_t *ds; 2928 uint64_t newval; 2929 2930 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2931 2932 dsl_prop_set_sync_impl(ds, 2933 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2934 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 2935 &ddsqra->ddsqra_value, tx); 2936 2937 VERIFY0(dsl_prop_get_int_ds(ds, 2938 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 2939 2940 if (ds->ds_quota != newval) { 2941 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2942 ds->ds_quota = newval; 2943 } 2944 dsl_dataset_rele(ds, FTAG); 2945} 2946 2947int 2948dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 2949 uint64_t refquota) 2950{ 2951 dsl_dataset_set_qr_arg_t ddsqra; 2952 2953 ddsqra.ddsqra_name = dsname; 2954 ddsqra.ddsqra_source = source; 2955 ddsqra.ddsqra_value = refquota; 2956 2957 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 2958 dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE)); 2959} 2960 2961static int 2962dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 2963{ 2964 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2965 dsl_pool_t *dp = dmu_tx_pool(tx); 2966 dsl_dataset_t *ds; 2967 int error; 2968 uint64_t newval, unique; 2969 2970 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 2971 return (SET_ERROR(ENOTSUP)); 2972 2973 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2974 if (error != 0) 2975 return (error); 2976 2977 if (dsl_dataset_is_snapshot(ds)) { 2978 dsl_dataset_rele(ds, FTAG); 2979 return (SET_ERROR(EINVAL)); 2980 } 2981 2982 error = dsl_prop_predict(ds->ds_dir, 2983 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2984 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2985 if (error != 0) { 2986 dsl_dataset_rele(ds, FTAG); 2987 return (error); 2988 } 2989 2990 /* 2991 * If we are doing the preliminary check in open context, the 2992 * space estimates may be inaccurate. 2993 */ 2994 if (!dmu_tx_is_syncing(tx)) { 2995 dsl_dataset_rele(ds, FTAG); 2996 return (0); 2997 } 2998 2999 mutex_enter(&ds->ds_lock); 3000 if (!DS_UNIQUE_IS_ACCURATE(ds)) 3001 dsl_dataset_recalc_head_uniq(ds); 3002 unique = ds->ds_phys->ds_unique_bytes; 3003 mutex_exit(&ds->ds_lock); 3004 3005 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 3006 uint64_t delta = MAX(unique, newval) - 3007 MAX(unique, ds->ds_reserved); 3008 3009 if (delta > 3010 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 3011 (ds->ds_quota > 0 && newval > ds->ds_quota)) { 3012 dsl_dataset_rele(ds, FTAG); 3013 return (SET_ERROR(ENOSPC)); 3014 } 3015 } 3016 3017 dsl_dataset_rele(ds, FTAG); 3018 return (0); 3019} 3020 3021void 3022dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 3023 zprop_source_t source, uint64_t value, dmu_tx_t *tx) 3024{ 3025 uint64_t newval; 3026 uint64_t unique; 3027 int64_t delta; 3028 3029 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3030 source, sizeof (value), 1, &value, tx); 3031 3032 VERIFY0(dsl_prop_get_int_ds(ds, 3033 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 3034 3035 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3036 mutex_enter(&ds->ds_dir->dd_lock); 3037 mutex_enter(&ds->ds_lock); 3038 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 3039 unique = ds->ds_phys->ds_unique_bytes; 3040 delta = MAX(0, (int64_t)(newval - unique)) - 3041 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3042 ds->ds_reserved = newval; 3043 mutex_exit(&ds->ds_lock); 3044 3045 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3046 mutex_exit(&ds->ds_dir->dd_lock); 3047} 3048 3049static void 3050dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 3051{ 3052 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3053 dsl_pool_t *dp = dmu_tx_pool(tx); 3054 dsl_dataset_t *ds; 3055 3056 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3057 dsl_dataset_set_refreservation_sync_impl(ds, 3058 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 3059 dsl_dataset_rele(ds, FTAG); 3060} 3061 3062int 3063dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 3064 uint64_t refreservation) 3065{ 3066 dsl_dataset_set_qr_arg_t ddsqra; 3067 3068 ddsqra.ddsqra_name = dsname; 3069 ddsqra.ddsqra_source = source; 3070 ddsqra.ddsqra_value = refreservation; 3071 3072 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 3073 dsl_dataset_set_refreservation_sync, &ddsqra, 3074 0, ZFS_SPACE_CHECK_NONE)); 3075} 3076 3077/* 3078 * Return (in *usedp) the amount of space written in new that is not 3079 * present in oldsnap. New may be a snapshot or the head. Old must be 3080 * a snapshot before new, in new's filesystem (or its origin). If not then 3081 * fail and return EINVAL. 3082 * 3083 * The written space is calculated by considering two components: First, we 3084 * ignore any freed space, and calculate the written as new's used space 3085 * minus old's used space. Next, we add in the amount of space that was freed 3086 * between the two snapshots, thus reducing new's used space relative to old's. 3087 * Specifically, this is the space that was born before old->ds_creation_txg, 3088 * and freed before new (ie. on new's deadlist or a previous deadlist). 3089 * 3090 * space freed [---------------------] 3091 * snapshots ---O-------O--------O-------O------ 3092 * oldsnap new 3093 */ 3094int 3095dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 3096 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3097{ 3098 int err = 0; 3099 uint64_t snapobj; 3100 dsl_pool_t *dp = new->ds_dir->dd_pool; 3101 3102 ASSERT(dsl_pool_config_held(dp)); 3103 3104 *usedp = 0; 3105 *usedp += new->ds_phys->ds_referenced_bytes; 3106 *usedp -= oldsnap->ds_phys->ds_referenced_bytes; 3107 3108 *compp = 0; 3109 *compp += new->ds_phys->ds_compressed_bytes; 3110 *compp -= oldsnap->ds_phys->ds_compressed_bytes; 3111 3112 *uncompp = 0; 3113 *uncompp += new->ds_phys->ds_uncompressed_bytes; 3114 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; 3115 3116 snapobj = new->ds_object; 3117 while (snapobj != oldsnap->ds_object) { 3118 dsl_dataset_t *snap; 3119 uint64_t used, comp, uncomp; 3120 3121 if (snapobj == new->ds_object) { 3122 snap = new; 3123 } else { 3124 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 3125 if (err != 0) 3126 break; 3127 } 3128 3129 if (snap->ds_phys->ds_prev_snap_txg == 3130 oldsnap->ds_phys->ds_creation_txg) { 3131 /* 3132 * The blocks in the deadlist can not be born after 3133 * ds_prev_snap_txg, so get the whole deadlist space, 3134 * which is more efficient (especially for old-format 3135 * deadlists). Unfortunately the deadlist code 3136 * doesn't have enough information to make this 3137 * optimization itself. 3138 */ 3139 dsl_deadlist_space(&snap->ds_deadlist, 3140 &used, &comp, &uncomp); 3141 } else { 3142 dsl_deadlist_space_range(&snap->ds_deadlist, 3143 0, oldsnap->ds_phys->ds_creation_txg, 3144 &used, &comp, &uncomp); 3145 } 3146 *usedp += used; 3147 *compp += comp; 3148 *uncompp += uncomp; 3149 3150 /* 3151 * If we get to the beginning of the chain of snapshots 3152 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 3153 * was not a snapshot of/before new. 3154 */ 3155 snapobj = snap->ds_phys->ds_prev_snap_obj; 3156 if (snap != new) 3157 dsl_dataset_rele(snap, FTAG); 3158 if (snapobj == 0) { 3159 err = SET_ERROR(EINVAL); 3160 break; 3161 } 3162 3163 } 3164 return (err); 3165} 3166 3167/* 3168 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 3169 * lastsnap, and all snapshots in between are deleted. 3170 * 3171 * blocks that would be freed [---------------------------] 3172 * snapshots ---O-------O--------O-------O--------O 3173 * firstsnap lastsnap 3174 * 3175 * This is the set of blocks that were born after the snap before firstsnap, 3176 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 3177 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 3178 * We calculate this by iterating over the relevant deadlists (from the snap 3179 * after lastsnap, backward to the snap after firstsnap), summing up the 3180 * space on the deadlist that was born after the snap before firstsnap. 3181 */ 3182int 3183dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 3184 dsl_dataset_t *lastsnap, 3185 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3186{ 3187 int err = 0; 3188 uint64_t snapobj; 3189 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 3190 3191 ASSERT(dsl_dataset_is_snapshot(firstsnap)); 3192 ASSERT(dsl_dataset_is_snapshot(lastsnap)); 3193 3194 /* 3195 * Check that the snapshots are in the same dsl_dir, and firstsnap 3196 * is before lastsnap. 3197 */ 3198 if (firstsnap->ds_dir != lastsnap->ds_dir || 3199 firstsnap->ds_phys->ds_creation_txg > 3200 lastsnap->ds_phys->ds_creation_txg) 3201 return (SET_ERROR(EINVAL)); 3202 3203 *usedp = *compp = *uncompp = 0; 3204 3205 snapobj = lastsnap->ds_phys->ds_next_snap_obj; 3206 while (snapobj != firstsnap->ds_object) { 3207 dsl_dataset_t *ds; 3208 uint64_t used, comp, uncomp; 3209 3210 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 3211 if (err != 0) 3212 break; 3213 3214 dsl_deadlist_space_range(&ds->ds_deadlist, 3215 firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, 3216 &used, &comp, &uncomp); 3217 *usedp += used; 3218 *compp += comp; 3219 *uncompp += uncomp; 3220 3221 snapobj = ds->ds_phys->ds_prev_snap_obj; 3222 ASSERT3U(snapobj, !=, 0); 3223 dsl_dataset_rele(ds, FTAG); 3224 } 3225 return (err); 3226} 3227 3228/* 3229 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 3230 * For example, they could both be snapshots of the same filesystem, and 3231 * 'earlier' is before 'later'. Or 'earlier' could be the origin of 3232 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 3233 * filesystem. Or 'earlier' could be the origin's origin. 3234 * 3235 * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. 3236 */ 3237boolean_t 3238dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, 3239 uint64_t earlier_txg) 3240{ 3241 dsl_pool_t *dp = later->ds_dir->dd_pool; 3242 int error; 3243 boolean_t ret; 3244 3245 ASSERT(dsl_pool_config_held(dp)); 3246 ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0); 3247 3248 if (earlier_txg == 0) 3249 earlier_txg = earlier->ds_phys->ds_creation_txg; 3250 3251 if (dsl_dataset_is_snapshot(later) && 3252 earlier_txg >= later->ds_phys->ds_creation_txg) 3253 return (B_FALSE); 3254 3255 if (later->ds_dir == earlier->ds_dir) 3256 return (B_TRUE); 3257 if (!dsl_dir_is_clone(later->ds_dir)) 3258 return (B_FALSE); 3259 3260 if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) 3261 return (B_TRUE); 3262 dsl_dataset_t *origin; 3263 error = dsl_dataset_hold_obj(dp, 3264 later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); 3265 if (error != 0) 3266 return (B_FALSE); 3267 ret = dsl_dataset_is_before(origin, earlier, earlier_txg); 3268 dsl_dataset_rele(origin, FTAG); 3269 return (ret); 3270} 3271 3272 3273void 3274dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) 3275{ 3276 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3277 dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); 3278} 3279