dsl_dataset.c revision 332547
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org> 25 * Copyright (c) 2011, 2017 by Delphix. All rights reserved. 26 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 27 * Copyright (c) 2014 RackTop Systems. 28 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 29 * Copyright (c) 2014 Integros [integros.com] 30 * Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved. 31 * Copyright 2017 Nexenta Systems, Inc. 32 */ 33 34#include <sys/dmu_objset.h> 35#include <sys/dsl_dataset.h> 36#include <sys/dsl_dir.h> 37#include <sys/dsl_prop.h> 38#include <sys/dsl_synctask.h> 39#include <sys/dmu_traverse.h> 40#include <sys/dmu_impl.h> 41#include <sys/dmu_send.h> 42#include <sys/dmu_tx.h> 43#include <sys/arc.h> 44#include <sys/zio.h> 45#include <sys/zap.h> 46#include <sys/zfeature.h> 47#include <sys/unique.h> 48#include <sys/zfs_context.h> 49#include <sys/zfs_ioctl.h> 50#include <sys/spa.h> 51#include <sys/spa_impl.h> 52#include <sys/vdev.h> 53#include <sys/zfs_znode.h> 54#include <sys/zfs_onexit.h> 55#include <sys/zvol.h> 56#include <sys/dsl_scan.h> 57#include <sys/dsl_deadlist.h> 58#include <sys/dsl_destroy.h> 59#include <sys/dsl_userhold.h> 60#include <sys/dsl_bookmark.h> 61#include <sys/dmu_send.h> 62#include <sys/zio_checksum.h> 63#include <sys/zio_compress.h> 64#include <zfs_fletcher.h> 65 66SYSCTL_DECL(_vfs_zfs); 67 68/* 69 * The SPA supports block sizes up to 16MB. However, very large blocks 70 * can have an impact on i/o latency (e.g. tying up a spinning disk for 71 * ~300ms), and also potentially on the memory allocator. Therefore, 72 * we do not allow the recordsize to be set larger than zfs_max_recordsize 73 * (default 1MB). Larger blocks can be created by changing this tunable, 74 * and pools with larger blocks can always be imported and used, regardless 75 * of this setting. 76 */ 77int zfs_max_recordsize = 1 * 1024 * 1024; 78SYSCTL_INT(_vfs_zfs, OID_AUTO, max_recordsize, CTLFLAG_RWTUN, 79 &zfs_max_recordsize, 0, 80 "Maximum block size. Expect dragons when tuning this."); 81 82#define SWITCH64(x, y) \ 83 { \ 84 uint64_t __tmp = (x); \ 85 (x) = (y); \ 86 (y) = __tmp; \ 87 } 88 89#define DS_REF_MAX (1ULL << 62) 90 91extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds); 92 93static void dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds, 94 uint64_t obj, dmu_tx_t *tx); 95static void dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds, 96 dmu_tx_t *tx); 97 98extern int spa_asize_inflation; 99 100static zil_header_t zero_zil; 101 102/* 103 * Figure out how much of this delta should be propogated to the dsl_dir 104 * layer. If there's a refreservation, that space has already been 105 * partially accounted for in our ancestors. 106 */ 107static int64_t 108parent_delta(dsl_dataset_t *ds, int64_t delta) 109{ 110 dsl_dataset_phys_t *ds_phys; 111 uint64_t old_bytes, new_bytes; 112 113 if (ds->ds_reserved == 0) 114 return (delta); 115 116 ds_phys = dsl_dataset_phys(ds); 117 old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved); 118 new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 119 120 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 121 return (new_bytes - old_bytes); 122} 123 124void 125dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 126{ 127 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 128 int compressed = BP_GET_PSIZE(bp); 129 int uncompressed = BP_GET_UCSIZE(bp); 130 int64_t delta; 131 132 dprintf_bp(bp, "ds=%p", ds); 133 134 ASSERT(dmu_tx_is_syncing(tx)); 135 /* It could have been compressed away to nothing */ 136 if (BP_IS_HOLE(bp)) 137 return; 138 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 139 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 140 if (ds == NULL) { 141 dsl_pool_mos_diduse_space(tx->tx_pool, 142 used, compressed, uncompressed); 143 return; 144 } 145 146 ASSERT3U(bp->blk_birth, >, dsl_dataset_phys(ds)->ds_prev_snap_txg); 147 dmu_buf_will_dirty(ds->ds_dbuf, tx); 148 mutex_enter(&ds->ds_lock); 149 delta = parent_delta(ds, used); 150 dsl_dataset_phys(ds)->ds_referenced_bytes += used; 151 dsl_dataset_phys(ds)->ds_compressed_bytes += compressed; 152 dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed; 153 dsl_dataset_phys(ds)->ds_unique_bytes += used; 154 155 if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) { 156 ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] = 157 B_TRUE; 158 } 159 160 spa_feature_t f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp)); 161 if (f != SPA_FEATURE_NONE) 162 ds->ds_feature_activation_needed[f] = B_TRUE; 163 164 mutex_exit(&ds->ds_lock); 165 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 166 compressed, uncompressed, tx); 167 dsl_dir_transfer_space(ds->ds_dir, used - delta, 168 DD_USED_REFRSRV, DD_USED_HEAD, NULL); 169} 170 171/* 172 * Called when the specified segment has been remapped, and is thus no 173 * longer referenced in the head dataset. The vdev must be indirect. 174 * 175 * If the segment is referenced by a snapshot, put it on the remap deadlist. 176 * Otherwise, add this segment to the obsolete spacemap. 177 */ 178void 179dsl_dataset_block_remapped(dsl_dataset_t *ds, uint64_t vdev, uint64_t offset, 180 uint64_t size, uint64_t birth, dmu_tx_t *tx) 181{ 182 spa_t *spa = ds->ds_dir->dd_pool->dp_spa; 183 184 ASSERT(dmu_tx_is_syncing(tx)); 185 ASSERT(birth <= tx->tx_txg); 186 ASSERT(!ds->ds_is_snapshot); 187 188 if (birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { 189 spa_vdev_indirect_mark_obsolete(spa, vdev, offset, size, tx); 190 } else { 191 blkptr_t fakebp; 192 dva_t *dva = &fakebp.blk_dva[0]; 193 194 ASSERT(ds != NULL); 195 196 mutex_enter(&ds->ds_remap_deadlist_lock); 197 if (!dsl_dataset_remap_deadlist_exists(ds)) { 198 dsl_dataset_create_remap_deadlist(ds, tx); 199 } 200 mutex_exit(&ds->ds_remap_deadlist_lock); 201 202 BP_ZERO(&fakebp); 203 fakebp.blk_birth = birth; 204 DVA_SET_VDEV(dva, vdev); 205 DVA_SET_OFFSET(dva, offset); 206 DVA_SET_ASIZE(dva, size); 207 208 dsl_deadlist_insert(&ds->ds_remap_deadlist, &fakebp, tx); 209 } 210} 211 212int 213dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 214 boolean_t async) 215{ 216 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 217 218 int used = bp_get_dsize_sync(spa, bp); 219 int compressed = BP_GET_PSIZE(bp); 220 int uncompressed = BP_GET_UCSIZE(bp); 221 222 if (BP_IS_HOLE(bp)) 223 return (0); 224 225 ASSERT(dmu_tx_is_syncing(tx)); 226 ASSERT(bp->blk_birth <= tx->tx_txg); 227 228 if (ds == NULL) { 229 dsl_free(tx->tx_pool, tx->tx_txg, bp); 230 dsl_pool_mos_diduse_space(tx->tx_pool, 231 -used, -compressed, -uncompressed); 232 return (used); 233 } 234 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 235 236 ASSERT(!ds->ds_is_snapshot); 237 dmu_buf_will_dirty(ds->ds_dbuf, tx); 238 239 if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { 240 int64_t delta; 241 242 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 243 dsl_free(tx->tx_pool, tx->tx_txg, bp); 244 245 mutex_enter(&ds->ds_lock); 246 ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used || 247 !DS_UNIQUE_IS_ACCURATE(ds)); 248 delta = parent_delta(ds, -used); 249 dsl_dataset_phys(ds)->ds_unique_bytes -= used; 250 mutex_exit(&ds->ds_lock); 251 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 252 delta, -compressed, -uncompressed, tx); 253 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 254 DD_USED_REFRSRV, DD_USED_HEAD, NULL); 255 } else { 256 dprintf_bp(bp, "putting on dead list: %s", ""); 257 if (async) { 258 /* 259 * We are here as part of zio's write done callback, 260 * which means we're a zio interrupt thread. We can't 261 * call dsl_deadlist_insert() now because it may block 262 * waiting for I/O. Instead, put bp on the deferred 263 * queue and let dsl_pool_sync() finish the job. 264 */ 265 bplist_append(&ds->ds_pending_deadlist, bp); 266 } else { 267 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 268 } 269 ASSERT3U(ds->ds_prev->ds_object, ==, 270 dsl_dataset_phys(ds)->ds_prev_snap_obj); 271 ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0); 272 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 273 if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 274 ds->ds_object && bp->blk_birth > 275 dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) { 276 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 277 mutex_enter(&ds->ds_prev->ds_lock); 278 dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used; 279 mutex_exit(&ds->ds_prev->ds_lock); 280 } 281 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 282 dsl_dir_transfer_space(ds->ds_dir, used, 283 DD_USED_HEAD, DD_USED_SNAP, tx); 284 } 285 } 286 mutex_enter(&ds->ds_lock); 287 ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used); 288 dsl_dataset_phys(ds)->ds_referenced_bytes -= used; 289 ASSERT3U(dsl_dataset_phys(ds)->ds_compressed_bytes, >=, compressed); 290 dsl_dataset_phys(ds)->ds_compressed_bytes -= compressed; 291 ASSERT3U(dsl_dataset_phys(ds)->ds_uncompressed_bytes, >=, uncompressed); 292 dsl_dataset_phys(ds)->ds_uncompressed_bytes -= uncompressed; 293 mutex_exit(&ds->ds_lock); 294 295 return (used); 296} 297 298/* 299 * We have to release the fsid syncronously or we risk that a subsequent 300 * mount of the same dataset will fail to unique_insert the fsid. This 301 * failure would manifest itself as the fsid of this dataset changing 302 * between mounts which makes NFS clients quite unhappy. 303 */ 304static void 305dsl_dataset_evict_sync(void *dbu) 306{ 307 dsl_dataset_t *ds = dbu; 308 309 ASSERT(ds->ds_owner == NULL); 310 311 unique_remove(ds->ds_fsid_guid); 312} 313 314static void 315dsl_dataset_evict_async(void *dbu) 316{ 317 dsl_dataset_t *ds = dbu; 318 319 ASSERT(ds->ds_owner == NULL); 320 321 ds->ds_dbuf = NULL; 322 323 if (ds->ds_objset != NULL) 324 dmu_objset_evict(ds->ds_objset); 325 326 if (ds->ds_prev) { 327 dsl_dataset_rele(ds->ds_prev, ds); 328 ds->ds_prev = NULL; 329 } 330 331 bplist_destroy(&ds->ds_pending_deadlist); 332 if (dsl_deadlist_is_open(&ds->ds_deadlist)) 333 dsl_deadlist_close(&ds->ds_deadlist); 334 if (dsl_deadlist_is_open(&ds->ds_remap_deadlist)) 335 dsl_deadlist_close(&ds->ds_remap_deadlist); 336 if (ds->ds_dir) 337 dsl_dir_async_rele(ds->ds_dir, ds); 338 339 ASSERT(!list_link_active(&ds->ds_synced_link)); 340 341 list_destroy(&ds->ds_prop_cbs); 342 if (mutex_owned(&ds->ds_lock)) 343 mutex_exit(&ds->ds_lock); 344 mutex_destroy(&ds->ds_lock); 345 if (mutex_owned(&ds->ds_opening_lock)) 346 mutex_exit(&ds->ds_opening_lock); 347 mutex_destroy(&ds->ds_opening_lock); 348 mutex_destroy(&ds->ds_sendstream_lock); 349 mutex_destroy(&ds->ds_remap_deadlist_lock); 350 refcount_destroy(&ds->ds_longholds); 351 rrw_destroy(&ds->ds_bp_rwlock); 352 353 kmem_free(ds, sizeof (dsl_dataset_t)); 354} 355 356int 357dsl_dataset_get_snapname(dsl_dataset_t *ds) 358{ 359 dsl_dataset_phys_t *headphys; 360 int err; 361 dmu_buf_t *headdbuf; 362 dsl_pool_t *dp = ds->ds_dir->dd_pool; 363 objset_t *mos = dp->dp_meta_objset; 364 365 if (ds->ds_snapname[0]) 366 return (0); 367 if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) 368 return (0); 369 370 err = dmu_bonus_hold(mos, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, 371 FTAG, &headdbuf); 372 if (err != 0) 373 return (err); 374 headphys = headdbuf->db_data; 375 err = zap_value_search(dp->dp_meta_objset, 376 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 377 dmu_buf_rele(headdbuf, FTAG); 378 return (err); 379} 380 381int 382dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 383{ 384 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 385 uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 386 matchtype_t mt = 0; 387 int err; 388 389 if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) 390 mt = MT_NORMALIZE; 391 392 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 393 value, mt, NULL, 0, NULL); 394 if (err == ENOTSUP && (mt & MT_NORMALIZE)) 395 err = zap_lookup(mos, snapobj, name, 8, 1, value); 396 return (err); 397} 398 399int 400dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, 401 boolean_t adj_cnt) 402{ 403 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 404 uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 405 matchtype_t mt = 0; 406 int err; 407 408 dsl_dir_snap_cmtime_update(ds->ds_dir); 409 410 if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) 411 mt = MT_NORMALIZE; 412 413 err = zap_remove_norm(mos, snapobj, name, mt, tx); 414 if (err == ENOTSUP && (mt & MT_NORMALIZE)) 415 err = zap_remove(mos, snapobj, name, tx); 416 417 if (err == 0 && adj_cnt) 418 dsl_fs_ss_count_adjust(ds->ds_dir, -1, 419 DD_FIELD_SNAPSHOT_COUNT, tx); 420 421 return (err); 422} 423 424boolean_t 425dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) 426{ 427 dmu_buf_t *dbuf = ds->ds_dbuf; 428 boolean_t result = B_FALSE; 429 430 if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset, 431 ds->ds_object, DMU_BONUS_BLKID, tag)) { 432 433 if (ds == dmu_buf_get_user(dbuf)) 434 result = B_TRUE; 435 else 436 dmu_buf_rele(dbuf, tag); 437 } 438 439 return (result); 440} 441 442int 443dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 444 dsl_dataset_t **dsp) 445{ 446 objset_t *mos = dp->dp_meta_objset; 447 dmu_buf_t *dbuf; 448 dsl_dataset_t *ds; 449 int err; 450 dmu_object_info_t doi; 451 452 ASSERT(dsl_pool_config_held(dp)); 453 454 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 455 if (err != 0) 456 return (err); 457 458 /* Make sure dsobj has the correct object type. */ 459 dmu_object_info_from_db(dbuf, &doi); 460 if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) { 461 dmu_buf_rele(dbuf, tag); 462 return (SET_ERROR(EINVAL)); 463 } 464 465 ds = dmu_buf_get_user(dbuf); 466 if (ds == NULL) { 467 dsl_dataset_t *winner = NULL; 468 469 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 470 ds->ds_dbuf = dbuf; 471 ds->ds_object = dsobj; 472 ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0; 473 474 err = dsl_dir_hold_obj(dp, dsl_dataset_phys(ds)->ds_dir_obj, 475 NULL, ds, &ds->ds_dir); 476 if (err != 0) { 477 kmem_free(ds, sizeof (dsl_dataset_t)); 478 dmu_buf_rele(dbuf, tag); 479 return (err); 480 } 481 482 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 483 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 484 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 485 mutex_init(&ds->ds_remap_deadlist_lock, 486 NULL, MUTEX_DEFAULT, NULL); 487 rrw_init(&ds->ds_bp_rwlock, B_FALSE); 488 refcount_create(&ds->ds_longholds); 489 490 bplist_create(&ds->ds_pending_deadlist); 491 492 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 493 offsetof(dmu_sendarg_t, dsa_link)); 494 495 list_create(&ds->ds_prop_cbs, sizeof (dsl_prop_cb_record_t), 496 offsetof(dsl_prop_cb_record_t, cbr_ds_node)); 497 498 if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 499 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 500 if (!(spa_feature_table[f].fi_flags & 501 ZFEATURE_FLAG_PER_DATASET)) 502 continue; 503 err = zap_contains(mos, dsobj, 504 spa_feature_table[f].fi_guid); 505 if (err == 0) { 506 ds->ds_feature_inuse[f] = B_TRUE; 507 } else { 508 ASSERT3U(err, ==, ENOENT); 509 err = 0; 510 } 511 } 512 } 513 514 if (!ds->ds_is_snapshot) { 515 ds->ds_snapname[0] = '\0'; 516 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 517 err = dsl_dataset_hold_obj(dp, 518 dsl_dataset_phys(ds)->ds_prev_snap_obj, 519 ds, &ds->ds_prev); 520 } 521 if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 522 int zaperr = zap_lookup(mos, ds->ds_object, 523 DS_FIELD_BOOKMARK_NAMES, 524 sizeof (ds->ds_bookmarks), 1, 525 &ds->ds_bookmarks); 526 if (zaperr != ENOENT) 527 VERIFY0(zaperr); 528 } 529 } else { 530 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 531 err = dsl_dataset_get_snapname(ds); 532 if (err == 0 && 533 dsl_dataset_phys(ds)->ds_userrefs_obj != 0) { 534 err = zap_count( 535 ds->ds_dir->dd_pool->dp_meta_objset, 536 dsl_dataset_phys(ds)->ds_userrefs_obj, 537 &ds->ds_userrefs); 538 } 539 } 540 541 if (err == 0 && !ds->ds_is_snapshot) { 542 err = dsl_prop_get_int_ds(ds, 543 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 544 &ds->ds_reserved); 545 if (err == 0) { 546 err = dsl_prop_get_int_ds(ds, 547 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 548 &ds->ds_quota); 549 } 550 } else { 551 ds->ds_reserved = ds->ds_quota = 0; 552 } 553 554 dsl_deadlist_open(&ds->ds_deadlist, 555 mos, dsl_dataset_phys(ds)->ds_deadlist_obj); 556 uint64_t remap_deadlist_obj = 557 dsl_dataset_get_remap_deadlist_object(ds); 558 if (remap_deadlist_obj != 0) { 559 dsl_deadlist_open(&ds->ds_remap_deadlist, mos, 560 remap_deadlist_obj); 561 } 562 563 dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync, 564 dsl_dataset_evict_async, &ds->ds_dbuf); 565 if (err == 0) 566 winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu); 567 568 if (err != 0 || winner != NULL) { 569 bplist_destroy(&ds->ds_pending_deadlist); 570 dsl_deadlist_close(&ds->ds_deadlist); 571 if (dsl_deadlist_is_open(&ds->ds_remap_deadlist)) 572 dsl_deadlist_close(&ds->ds_remap_deadlist); 573 if (ds->ds_prev) 574 dsl_dataset_rele(ds->ds_prev, ds); 575 dsl_dir_rele(ds->ds_dir, ds); 576 mutex_destroy(&ds->ds_lock); 577 mutex_destroy(&ds->ds_opening_lock); 578 mutex_destroy(&ds->ds_sendstream_lock); 579 refcount_destroy(&ds->ds_longholds); 580 kmem_free(ds, sizeof (dsl_dataset_t)); 581 if (err != 0) { 582 dmu_buf_rele(dbuf, tag); 583 return (err); 584 } 585 ds = winner; 586 } else { 587 ds->ds_fsid_guid = 588 unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid); 589 if (ds->ds_fsid_guid != 590 dsl_dataset_phys(ds)->ds_fsid_guid) { 591 zfs_dbgmsg("ds_fsid_guid changed from " 592 "%llx to %llx for pool %s dataset id %llu", 593 (long long) 594 dsl_dataset_phys(ds)->ds_fsid_guid, 595 (long long)ds->ds_fsid_guid, 596 spa_name(dp->dp_spa), 597 dsobj); 598 } 599 } 600 } 601 ASSERT3P(ds->ds_dbuf, ==, dbuf); 602 ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data); 603 ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 || 604 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 605 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 606 *dsp = ds; 607 return (0); 608} 609 610int 611dsl_dataset_hold(dsl_pool_t *dp, const char *name, 612 void *tag, dsl_dataset_t **dsp) 613{ 614 dsl_dir_t *dd; 615 const char *snapname; 616 uint64_t obj; 617 int err = 0; 618 dsl_dataset_t *ds; 619 620 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 621 if (err != 0) 622 return (err); 623 624 ASSERT(dsl_pool_config_held(dp)); 625 obj = dsl_dir_phys(dd)->dd_head_dataset_obj; 626 if (obj != 0) 627 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 628 else 629 err = SET_ERROR(ENOENT); 630 631 /* we may be looking for a snapshot */ 632 if (err == 0 && snapname != NULL) { 633 dsl_dataset_t *snap_ds; 634 635 if (*snapname++ != '@') { 636 dsl_dataset_rele(ds, tag); 637 dsl_dir_rele(dd, FTAG); 638 return (SET_ERROR(ENOENT)); 639 } 640 641 dprintf("looking for snapshot '%s'\n", snapname); 642 err = dsl_dataset_snap_lookup(ds, snapname, &obj); 643 if (err == 0) 644 err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds); 645 dsl_dataset_rele(ds, tag); 646 647 if (err == 0) { 648 mutex_enter(&snap_ds->ds_lock); 649 if (snap_ds->ds_snapname[0] == 0) 650 (void) strlcpy(snap_ds->ds_snapname, snapname, 651 sizeof (snap_ds->ds_snapname)); 652 mutex_exit(&snap_ds->ds_lock); 653 ds = snap_ds; 654 } 655 } 656 if (err == 0) 657 *dsp = ds; 658 dsl_dir_rele(dd, FTAG); 659 return (err); 660} 661 662int 663dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 664 void *tag, dsl_dataset_t **dsp) 665{ 666 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 667 if (err != 0) 668 return (err); 669 if (!dsl_dataset_tryown(*dsp, tag)) { 670 dsl_dataset_rele(*dsp, tag); 671 *dsp = NULL; 672 return (SET_ERROR(EBUSY)); 673 } 674 return (0); 675} 676 677int 678dsl_dataset_own(dsl_pool_t *dp, const char *name, 679 void *tag, dsl_dataset_t **dsp) 680{ 681 int err = dsl_dataset_hold(dp, name, tag, dsp); 682 if (err != 0) 683 return (err); 684 if (!dsl_dataset_tryown(*dsp, tag)) { 685 dsl_dataset_rele(*dsp, tag); 686 return (SET_ERROR(EBUSY)); 687 } 688 return (0); 689} 690 691/* 692 * See the comment above dsl_pool_hold() for details. In summary, a long 693 * hold is used to prevent destruction of a dataset while the pool hold 694 * is dropped, allowing other concurrent operations (e.g. spa_sync()). 695 * 696 * The dataset and pool must be held when this function is called. After it 697 * is called, the pool hold may be released while the dataset is still held 698 * and accessed. 699 */ 700void 701dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 702{ 703 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 704 (void) refcount_add(&ds->ds_longholds, tag); 705} 706 707void 708dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 709{ 710 (void) refcount_remove(&ds->ds_longholds, tag); 711} 712 713/* Return B_TRUE if there are any long holds on this dataset. */ 714boolean_t 715dsl_dataset_long_held(dsl_dataset_t *ds) 716{ 717 return (!refcount_is_zero(&ds->ds_longholds)); 718} 719 720void 721dsl_dataset_name(dsl_dataset_t *ds, char *name) 722{ 723 if (ds == NULL) { 724 (void) strcpy(name, "mos"); 725 } else { 726 dsl_dir_name(ds->ds_dir, name); 727 VERIFY0(dsl_dataset_get_snapname(ds)); 728 if (ds->ds_snapname[0]) { 729 VERIFY3U(strlcat(name, "@", ZFS_MAX_DATASET_NAME_LEN), 730 <, ZFS_MAX_DATASET_NAME_LEN); 731 /* 732 * We use a "recursive" mutex so that we 733 * can call dprintf_ds() with ds_lock held. 734 */ 735 if (!MUTEX_HELD(&ds->ds_lock)) { 736 mutex_enter(&ds->ds_lock); 737 VERIFY3U(strlcat(name, ds->ds_snapname, 738 ZFS_MAX_DATASET_NAME_LEN), <, 739 ZFS_MAX_DATASET_NAME_LEN); 740 mutex_exit(&ds->ds_lock); 741 } else { 742 VERIFY3U(strlcat(name, ds->ds_snapname, 743 ZFS_MAX_DATASET_NAME_LEN), <, 744 ZFS_MAX_DATASET_NAME_LEN); 745 } 746 } 747 } 748} 749 750int 751dsl_dataset_namelen(dsl_dataset_t *ds) 752{ 753 VERIFY0(dsl_dataset_get_snapname(ds)); 754 mutex_enter(&ds->ds_lock); 755 int len = dsl_dir_namelen(ds->ds_dir) + 1 + strlen(ds->ds_snapname); 756 mutex_exit(&ds->ds_lock); 757 return (len); 758} 759 760void 761dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 762{ 763 dmu_buf_rele(ds->ds_dbuf, tag); 764} 765 766void 767dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 768{ 769 ASSERT3P(ds->ds_owner, ==, tag); 770 ASSERT(ds->ds_dbuf != NULL); 771 772 mutex_enter(&ds->ds_lock); 773 ds->ds_owner = NULL; 774 mutex_exit(&ds->ds_lock); 775 dsl_dataset_long_rele(ds, tag); 776 dsl_dataset_rele(ds, tag); 777} 778 779boolean_t 780dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 781{ 782 boolean_t gotit = FALSE; 783 784 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 785 mutex_enter(&ds->ds_lock); 786 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 787 ds->ds_owner = tag; 788 dsl_dataset_long_hold(ds, tag); 789 gotit = TRUE; 790 } 791 mutex_exit(&ds->ds_lock); 792 return (gotit); 793} 794 795boolean_t 796dsl_dataset_has_owner(dsl_dataset_t *ds) 797{ 798 boolean_t rv; 799 mutex_enter(&ds->ds_lock); 800 rv = (ds->ds_owner != NULL); 801 mutex_exit(&ds->ds_lock); 802 return (rv); 803} 804 805static void 806dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) 807{ 808 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 809 objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; 810 uint64_t zero = 0; 811 812 VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET); 813 814 spa_feature_incr(spa, f, tx); 815 dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); 816 817 VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid, 818 sizeof (zero), 1, &zero, tx)); 819} 820 821void 822dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) 823{ 824 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 825 objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; 826 827 VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET); 828 829 VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx)); 830 spa_feature_decr(spa, f, tx); 831} 832 833uint64_t 834dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 835 uint64_t flags, dmu_tx_t *tx) 836{ 837 dsl_pool_t *dp = dd->dd_pool; 838 dmu_buf_t *dbuf; 839 dsl_dataset_phys_t *dsphys; 840 uint64_t dsobj; 841 objset_t *mos = dp->dp_meta_objset; 842 843 if (origin == NULL) 844 origin = dp->dp_origin_snap; 845 846 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 847 ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0); 848 ASSERT(dmu_tx_is_syncing(tx)); 849 ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0); 850 851 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 852 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 853 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 854 dmu_buf_will_dirty(dbuf, tx); 855 dsphys = dbuf->db_data; 856 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 857 dsphys->ds_dir_obj = dd->dd_object; 858 dsphys->ds_flags = flags; 859 dsphys->ds_fsid_guid = unique_create(); 860 do { 861 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 862 sizeof (dsphys->ds_guid)); 863 } while (dsphys->ds_guid == 0); 864 dsphys->ds_snapnames_zapobj = 865 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 866 DMU_OT_NONE, 0, tx); 867 dsphys->ds_creation_time = gethrestime_sec(); 868 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 869 870 if (origin == NULL) { 871 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 872 } else { 873 dsl_dataset_t *ohds; /* head of the origin snapshot */ 874 875 dsphys->ds_prev_snap_obj = origin->ds_object; 876 dsphys->ds_prev_snap_txg = 877 dsl_dataset_phys(origin)->ds_creation_txg; 878 dsphys->ds_referenced_bytes = 879 dsl_dataset_phys(origin)->ds_referenced_bytes; 880 dsphys->ds_compressed_bytes = 881 dsl_dataset_phys(origin)->ds_compressed_bytes; 882 dsphys->ds_uncompressed_bytes = 883 dsl_dataset_phys(origin)->ds_uncompressed_bytes; 884 rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG); 885 dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp; 886 rrw_exit(&origin->ds_bp_rwlock, FTAG); 887 888 /* 889 * Inherit flags that describe the dataset's contents 890 * (INCONSISTENT) or properties (Case Insensitive). 891 */ 892 dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags & 893 (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET); 894 895 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 896 if (origin->ds_feature_inuse[f]) 897 dsl_dataset_activate_feature(dsobj, f, tx); 898 } 899 900 dmu_buf_will_dirty(origin->ds_dbuf, tx); 901 dsl_dataset_phys(origin)->ds_num_children++; 902 903 VERIFY0(dsl_dataset_hold_obj(dp, 904 dsl_dir_phys(origin->ds_dir)->dd_head_dataset_obj, 905 FTAG, &ohds)); 906 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 907 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 908 dsl_dataset_rele(ohds, FTAG); 909 910 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 911 if (dsl_dataset_phys(origin)->ds_next_clones_obj == 0) { 912 dsl_dataset_phys(origin)->ds_next_clones_obj = 913 zap_create(mos, 914 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 915 } 916 VERIFY0(zap_add_int(mos, 917 dsl_dataset_phys(origin)->ds_next_clones_obj, 918 dsobj, tx)); 919 } 920 921 dmu_buf_will_dirty(dd->dd_dbuf, tx); 922 dsl_dir_phys(dd)->dd_origin_obj = origin->ds_object; 923 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 924 if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { 925 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 926 dsl_dir_phys(origin->ds_dir)->dd_clones = 927 zap_create(mos, 928 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 929 } 930 VERIFY0(zap_add_int(mos, 931 dsl_dir_phys(origin->ds_dir)->dd_clones, 932 dsobj, tx)); 933 } 934 } 935 936 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 937 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 938 939 dmu_buf_rele(dbuf, FTAG); 940 941 dmu_buf_will_dirty(dd->dd_dbuf, tx); 942 dsl_dir_phys(dd)->dd_head_dataset_obj = dsobj; 943 944 return (dsobj); 945} 946 947static void 948dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 949{ 950 objset_t *os; 951 952 VERIFY0(dmu_objset_from_ds(ds, &os)); 953 if (bcmp(&os->os_zil_header, &zero_zil, sizeof (zero_zil)) != 0) { 954 dsl_pool_t *dp = ds->ds_dir->dd_pool; 955 zio_t *zio; 956 957 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 958 959 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 960 dsl_dataset_sync(ds, zio, tx); 961 VERIFY0(zio_wait(zio)); 962 963 /* dsl_dataset_sync_done will drop this reference. */ 964 dmu_buf_add_ref(ds->ds_dbuf, ds); 965 dsl_dataset_sync_done(ds, tx); 966 } 967} 968 969uint64_t 970dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 971 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 972{ 973 dsl_pool_t *dp = pdd->dd_pool; 974 uint64_t dsobj, ddobj; 975 dsl_dir_t *dd; 976 977 ASSERT(dmu_tx_is_syncing(tx)); 978 ASSERT(lastname[0] != '@'); 979 980 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 981 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 982 983 dsobj = dsl_dataset_create_sync_dd(dd, origin, 984 flags & ~DS_CREATE_FLAG_NODIRTY, tx); 985 986 dsl_deleg_set_create_perms(dd, tx, cr); 987 988 /* 989 * Since we're creating a new node we know it's a leaf, so we can 990 * initialize the counts if the limit feature is active. 991 */ 992 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 993 uint64_t cnt = 0; 994 objset_t *os = dd->dd_pool->dp_meta_objset; 995 996 dsl_dir_zapify(dd, tx); 997 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 998 sizeof (cnt), 1, &cnt, tx)); 999 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 1000 sizeof (cnt), 1, &cnt, tx)); 1001 } 1002 1003 dsl_dir_rele(dd, FTAG); 1004 1005 /* 1006 * If we are creating a clone, make sure we zero out any stale 1007 * data from the origin snapshots zil header. 1008 */ 1009 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 1010 dsl_dataset_t *ds; 1011 1012 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 1013 dsl_dataset_zero_zil(ds, tx); 1014 dsl_dataset_rele(ds, FTAG); 1015 } 1016 1017 return (dsobj); 1018} 1019 1020#ifdef __FreeBSD__ 1021/* FreeBSD ioctl compat begin */ 1022struct destroyarg { 1023 nvlist_t *nvl; 1024 const char *snapname; 1025}; 1026 1027static int 1028dsl_check_snap_cb(const char *name, void *arg) 1029{ 1030 struct destroyarg *da = arg; 1031 dsl_dataset_t *ds; 1032 char *dsname; 1033 1034 dsname = kmem_asprintf("%s@%s", name, da->snapname); 1035 fnvlist_add_boolean(da->nvl, dsname); 1036 kmem_free(dsname, strlen(dsname) + 1); 1037 1038 return (0); 1039} 1040 1041int 1042dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname, 1043 nvlist_t *snaps) 1044{ 1045 struct destroyarg *da; 1046 int err; 1047 1048 da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); 1049 da->nvl = snaps; 1050 da->snapname = snapname; 1051 err = dmu_objset_find(fsname, dsl_check_snap_cb, da, 1052 DS_FIND_CHILDREN); 1053 kmem_free(da, sizeof (struct destroyarg)); 1054 1055 return (err); 1056} 1057/* FreeBSD ioctl compat end */ 1058#endif /* __FreeBSD__ */ 1059 1060/* 1061 * The unique space in the head dataset can be calculated by subtracting 1062 * the space used in the most recent snapshot, that is still being used 1063 * in this file system, from the space currently in use. To figure out 1064 * the space in the most recent snapshot still in use, we need to take 1065 * the total space used in the snapshot and subtract out the space that 1066 * has been freed up since the snapshot was taken. 1067 */ 1068void 1069dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1070{ 1071 uint64_t mrs_used; 1072 uint64_t dlused, dlcomp, dluncomp; 1073 1074 ASSERT(!ds->ds_is_snapshot); 1075 1076 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) 1077 mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes; 1078 else 1079 mrs_used = 0; 1080 1081 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 1082 1083 ASSERT3U(dlused, <=, mrs_used); 1084 dsl_dataset_phys(ds)->ds_unique_bytes = 1085 dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused); 1086 1087 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1088 SPA_VERSION_UNIQUE_ACCURATE) 1089 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1090} 1091 1092void 1093dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 1094 dmu_tx_t *tx) 1095{ 1096 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1097 uint64_t count; 1098 int err; 1099 1100 ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2); 1101 err = zap_remove_int(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 1102 obj, tx); 1103 /* 1104 * The err should not be ENOENT, but a bug in a previous version 1105 * of the code could cause upgrade_clones_cb() to not set 1106 * ds_next_snap_obj when it should, leading to a missing entry. 1107 * If we knew that the pool was created after 1108 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 1109 * ENOENT. However, at least we can check that we don't have 1110 * too many entries in the next_clones_obj even after failing to 1111 * remove this one. 1112 */ 1113 if (err != ENOENT) 1114 VERIFY0(err); 1115 ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 1116 &count)); 1117 ASSERT3U(count, <=, dsl_dataset_phys(ds)->ds_num_children - 2); 1118} 1119 1120 1121blkptr_t * 1122dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1123{ 1124 return (&dsl_dataset_phys(ds)->ds_bp); 1125} 1126 1127spa_t * 1128dsl_dataset_get_spa(dsl_dataset_t *ds) 1129{ 1130 return (ds->ds_dir->dd_pool->dp_spa); 1131} 1132 1133void 1134dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1135{ 1136 dsl_pool_t *dp; 1137 1138 if (ds == NULL) /* this is the meta-objset */ 1139 return; 1140 1141 ASSERT(ds->ds_objset != NULL); 1142 1143 if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) 1144 panic("dirtying snapshot!"); 1145 1146 /* Must not dirty a dataset in the same txg where it got snapshotted. */ 1147 ASSERT3U(tx->tx_txg, >, dsl_dataset_phys(ds)->ds_prev_snap_txg); 1148 1149 dp = ds->ds_dir->dd_pool; 1150 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 1151 /* up the hold count until we can be written out */ 1152 dmu_buf_add_ref(ds->ds_dbuf, ds); 1153 } 1154} 1155 1156boolean_t 1157dsl_dataset_is_dirty(dsl_dataset_t *ds) 1158{ 1159 for (int t = 0; t < TXG_SIZE; t++) { 1160 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 1161 ds, t)) 1162 return (B_TRUE); 1163 } 1164 return (B_FALSE); 1165} 1166 1167static int 1168dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1169{ 1170 uint64_t asize; 1171 1172 if (!dmu_tx_is_syncing(tx)) 1173 return (0); 1174 1175 /* 1176 * If there's an fs-only reservation, any blocks that might become 1177 * owned by the snapshot dataset must be accommodated by space 1178 * outside of the reservation. 1179 */ 1180 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 1181 asize = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved); 1182 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 1183 return (SET_ERROR(ENOSPC)); 1184 1185 /* 1186 * Propagate any reserved space for this snapshot to other 1187 * snapshot checks in this sync group. 1188 */ 1189 if (asize > 0) 1190 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1191 1192 return (0); 1193} 1194 1195int 1196dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, 1197 dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr) 1198{ 1199 int error; 1200 uint64_t value; 1201 1202 ds->ds_trysnap_txg = tx->tx_txg; 1203 1204 if (!dmu_tx_is_syncing(tx)) 1205 return (0); 1206 1207 /* 1208 * We don't allow multiple snapshots of the same txg. If there 1209 * is already one, try again. 1210 */ 1211 if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) 1212 return (SET_ERROR(EAGAIN)); 1213 1214 /* 1215 * Check for conflicting snapshot name. 1216 */ 1217 error = dsl_dataset_snap_lookup(ds, snapname, &value); 1218 if (error == 0) 1219 return (SET_ERROR(EEXIST)); 1220 if (error != ENOENT) 1221 return (error); 1222 1223 /* 1224 * We don't allow taking snapshots of inconsistent datasets, such as 1225 * those into which we are currently receiving. However, if we are 1226 * creating this snapshot as part of a receive, this check will be 1227 * executed atomically with respect to the completion of the receive 1228 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this 1229 * case we ignore this, knowing it will be fixed up for us shortly in 1230 * dmu_recv_end_sync(). 1231 */ 1232 if (!recv && DS_IS_INCONSISTENT(ds)) 1233 return (SET_ERROR(EBUSY)); 1234 1235 /* 1236 * Skip the check for temporary snapshots or if we have already checked 1237 * the counts in dsl_dataset_snapshot_check. This means we really only 1238 * check the count here when we're receiving a stream. 1239 */ 1240 if (cnt != 0 && cr != NULL) { 1241 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1242 ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr); 1243 if (error != 0) 1244 return (error); 1245 } 1246 1247 error = dsl_dataset_snapshot_reserve_space(ds, tx); 1248 if (error != 0) 1249 return (error); 1250 1251 return (0); 1252} 1253 1254int 1255dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) 1256{ 1257 dsl_dataset_snapshot_arg_t *ddsa = arg; 1258 dsl_pool_t *dp = dmu_tx_pool(tx); 1259 nvpair_t *pair; 1260 int rv = 0; 1261 1262 /* 1263 * Pre-compute how many total new snapshots will be created for each 1264 * level in the tree and below. This is needed for validating the 1265 * snapshot limit when either taking a recursive snapshot or when 1266 * taking multiple snapshots. 1267 * 1268 * The problem is that the counts are not actually adjusted when 1269 * we are checking, only when we finally sync. For a single snapshot, 1270 * this is easy, the count will increase by 1 at each node up the tree, 1271 * but its more complicated for the recursive/multiple snapshot case. 1272 * 1273 * The dsl_fs_ss_limit_check function does recursively check the count 1274 * at each level up the tree but since it is validating each snapshot 1275 * independently we need to be sure that we are validating the complete 1276 * count for the entire set of snapshots. We do this by rolling up the 1277 * counts for each component of the name into an nvlist and then 1278 * checking each of those cases with the aggregated count. 1279 * 1280 * This approach properly handles not only the recursive snapshot 1281 * case (where we get all of those on the ddsa_snaps list) but also 1282 * the sibling case (e.g. snapshot a/b and a/c so that we will also 1283 * validate the limit on 'a' using a count of 2). 1284 * 1285 * We validate the snapshot names in the third loop and only report 1286 * name errors once. 1287 */ 1288 if (dmu_tx_is_syncing(tx)) { 1289 nvlist_t *cnt_track = NULL; 1290 cnt_track = fnvlist_alloc(); 1291 1292 /* Rollup aggregated counts into the cnt_track list */ 1293 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1294 pair != NULL; 1295 pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1296 char *pdelim; 1297 uint64_t val; 1298 char nm[MAXPATHLEN]; 1299 1300 (void) strlcpy(nm, nvpair_name(pair), sizeof (nm)); 1301 pdelim = strchr(nm, '@'); 1302 if (pdelim == NULL) 1303 continue; 1304 *pdelim = '\0'; 1305 1306 do { 1307 if (nvlist_lookup_uint64(cnt_track, nm, 1308 &val) == 0) { 1309 /* update existing entry */ 1310 fnvlist_add_uint64(cnt_track, nm, 1311 val + 1); 1312 } else { 1313 /* add to list */ 1314 fnvlist_add_uint64(cnt_track, nm, 1); 1315 } 1316 1317 pdelim = strrchr(nm, '/'); 1318 if (pdelim != NULL) 1319 *pdelim = '\0'; 1320 } while (pdelim != NULL); 1321 } 1322 1323 /* Check aggregated counts at each level */ 1324 for (pair = nvlist_next_nvpair(cnt_track, NULL); 1325 pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) { 1326 int error = 0; 1327 char *name; 1328 uint64_t cnt = 0; 1329 dsl_dataset_t *ds; 1330 1331 name = nvpair_name(pair); 1332 cnt = fnvpair_value_uint64(pair); 1333 ASSERT(cnt > 0); 1334 1335 error = dsl_dataset_hold(dp, name, FTAG, &ds); 1336 if (error == 0) { 1337 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1338 ZFS_PROP_SNAPSHOT_LIMIT, NULL, 1339 ddsa->ddsa_cr); 1340 dsl_dataset_rele(ds, FTAG); 1341 } 1342 1343 if (error != 0) { 1344 if (ddsa->ddsa_errors != NULL) 1345 fnvlist_add_int32(ddsa->ddsa_errors, 1346 name, error); 1347 rv = error; 1348 /* only report one error for this check */ 1349 break; 1350 } 1351 } 1352 nvlist_free(cnt_track); 1353 } 1354 1355 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1356 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1357 int error = 0; 1358 dsl_dataset_t *ds; 1359 char *name, *atp; 1360 char dsname[ZFS_MAX_DATASET_NAME_LEN]; 1361 1362 name = nvpair_name(pair); 1363 if (strlen(name) >= ZFS_MAX_DATASET_NAME_LEN) 1364 error = SET_ERROR(ENAMETOOLONG); 1365 if (error == 0) { 1366 atp = strchr(name, '@'); 1367 if (atp == NULL) 1368 error = SET_ERROR(EINVAL); 1369 if (error == 0) 1370 (void) strlcpy(dsname, name, atp - name + 1); 1371 } 1372 if (error == 0) 1373 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 1374 if (error == 0) { 1375 /* passing 0/NULL skips dsl_fs_ss_limit_check */ 1376 error = dsl_dataset_snapshot_check_impl(ds, 1377 atp + 1, tx, B_FALSE, 0, NULL); 1378 dsl_dataset_rele(ds, FTAG); 1379 } 1380 1381 if (error != 0) { 1382 if (ddsa->ddsa_errors != NULL) { 1383 fnvlist_add_int32(ddsa->ddsa_errors, 1384 name, error); 1385 } 1386 rv = error; 1387 } 1388 } 1389 1390 return (rv); 1391} 1392 1393void 1394dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, 1395 dmu_tx_t *tx) 1396{ 1397 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1398 dmu_buf_t *dbuf; 1399 dsl_dataset_phys_t *dsphys; 1400 uint64_t dsobj, crtxg; 1401 objset_t *mos = dp->dp_meta_objset; 1402 objset_t *os; 1403 1404 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1405 1406 /* 1407 * If we are on an old pool, the zil must not be active, in which 1408 * case it will be zeroed. Usually zil_suspend() accomplishes this. 1409 */ 1410 ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || 1411 dmu_objset_from_ds(ds, &os) != 0 || 1412 bcmp(&os->os_phys->os_zil_header, &zero_zil, 1413 sizeof (zero_zil)) == 0); 1414 1415 /* Should not snapshot a dirty dataset. */ 1416 ASSERT(!txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 1417 ds, tx->tx_txg)); 1418 1419 dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); 1420 1421 /* 1422 * The origin's ds_creation_txg has to be < TXG_INITIAL 1423 */ 1424 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1425 crtxg = 1; 1426 else 1427 crtxg = tx->tx_txg; 1428 1429 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1430 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1431 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1432 dmu_buf_will_dirty(dbuf, tx); 1433 dsphys = dbuf->db_data; 1434 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1435 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1436 dsphys->ds_fsid_guid = unique_create(); 1437 do { 1438 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1439 sizeof (dsphys->ds_guid)); 1440 } while (dsphys->ds_guid == 0); 1441 dsphys->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 1442 dsphys->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; 1443 dsphys->ds_next_snap_obj = ds->ds_object; 1444 dsphys->ds_num_children = 1; 1445 dsphys->ds_creation_time = gethrestime_sec(); 1446 dsphys->ds_creation_txg = crtxg; 1447 dsphys->ds_deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; 1448 dsphys->ds_referenced_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes; 1449 dsphys->ds_compressed_bytes = dsl_dataset_phys(ds)->ds_compressed_bytes; 1450 dsphys->ds_uncompressed_bytes = 1451 dsl_dataset_phys(ds)->ds_uncompressed_bytes; 1452 dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags; 1453 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 1454 dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp; 1455 rrw_exit(&ds->ds_bp_rwlock, FTAG); 1456 dmu_buf_rele(dbuf, FTAG); 1457 1458 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 1459 if (ds->ds_feature_inuse[f]) 1460 dsl_dataset_activate_feature(dsobj, f, tx); 1461 } 1462 1463 ASSERT3U(ds->ds_prev != 0, ==, 1464 dsl_dataset_phys(ds)->ds_prev_snap_obj != 0); 1465 if (ds->ds_prev) { 1466 uint64_t next_clones_obj = 1467 dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj; 1468 ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 1469 ds->ds_object || 1470 dsl_dataset_phys(ds->ds_prev)->ds_num_children > 1); 1471 if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 1472 ds->ds_object) { 1473 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1474 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, 1475 dsl_dataset_phys(ds->ds_prev)->ds_creation_txg); 1476 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj = dsobj; 1477 } else if (next_clones_obj != 0) { 1478 dsl_dataset_remove_from_next_clones(ds->ds_prev, 1479 dsphys->ds_next_snap_obj, tx); 1480 VERIFY0(zap_add_int(mos, 1481 next_clones_obj, dsobj, tx)); 1482 } 1483 } 1484 1485 /* 1486 * If we have a reference-reservation on this dataset, we will 1487 * need to increase the amount of refreservation being charged 1488 * since our unique space is going to zero. 1489 */ 1490 if (ds->ds_reserved) { 1491 int64_t delta; 1492 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 1493 delta = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, 1494 ds->ds_reserved); 1495 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1496 delta, 0, 0, tx); 1497 } 1498 1499 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1500 dsl_dataset_phys(ds)->ds_deadlist_obj = 1501 dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX, 1502 dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); 1503 dsl_deadlist_close(&ds->ds_deadlist); 1504 dsl_deadlist_open(&ds->ds_deadlist, mos, 1505 dsl_dataset_phys(ds)->ds_deadlist_obj); 1506 dsl_deadlist_add_key(&ds->ds_deadlist, 1507 dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); 1508 1509 if (dsl_dataset_remap_deadlist_exists(ds)) { 1510 uint64_t remap_deadlist_obj = 1511 dsl_dataset_get_remap_deadlist_object(ds); 1512 /* 1513 * Move the remap_deadlist to the snapshot. The head 1514 * will create a new remap deadlist on demand, from 1515 * dsl_dataset_block_remapped(). 1516 */ 1517 dsl_dataset_unset_remap_deadlist_object(ds, tx); 1518 dsl_deadlist_close(&ds->ds_remap_deadlist); 1519 1520 dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); 1521 VERIFY0(zap_add(mos, dsobj, DS_FIELD_REMAP_DEADLIST, 1522 sizeof (remap_deadlist_obj), 1, &remap_deadlist_obj, tx)); 1523 } 1524 1525 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg); 1526 dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj; 1527 dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg; 1528 dsl_dataset_phys(ds)->ds_unique_bytes = 0; 1529 1530 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1531 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1532 1533 VERIFY0(zap_add(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj, 1534 snapname, 8, 1, &dsobj, tx)); 1535 1536 if (ds->ds_prev) 1537 dsl_dataset_rele(ds->ds_prev, ds); 1538 VERIFY0(dsl_dataset_hold_obj(dp, 1539 dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev)); 1540 1541 dsl_scan_ds_snapshotted(ds, tx); 1542 1543 dsl_dir_snap_cmtime_update(ds->ds_dir); 1544 1545 spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); 1546} 1547 1548void 1549dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) 1550{ 1551 dsl_dataset_snapshot_arg_t *ddsa = arg; 1552 dsl_pool_t *dp = dmu_tx_pool(tx); 1553 nvpair_t *pair; 1554 1555 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1556 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1557 dsl_dataset_t *ds; 1558 char *name, *atp; 1559 char dsname[ZFS_MAX_DATASET_NAME_LEN]; 1560 1561 name = nvpair_name(pair); 1562 atp = strchr(name, '@'); 1563 (void) strlcpy(dsname, name, atp - name + 1); 1564 VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); 1565 1566 dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); 1567 if (ddsa->ddsa_props != NULL) { 1568 dsl_props_set_sync_impl(ds->ds_prev, 1569 ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); 1570 } 1571 dsl_dataset_rele(ds, FTAG); 1572 } 1573} 1574 1575/* 1576 * The snapshots must all be in the same pool. 1577 * All-or-nothing: if there are any failures, nothing will be modified. 1578 */ 1579int 1580dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) 1581{ 1582 dsl_dataset_snapshot_arg_t ddsa; 1583 nvpair_t *pair; 1584 boolean_t needsuspend; 1585 int error; 1586 spa_t *spa; 1587 char *firstname; 1588 nvlist_t *suspended = NULL; 1589 1590 pair = nvlist_next_nvpair(snaps, NULL); 1591 if (pair == NULL) 1592 return (0); 1593 firstname = nvpair_name(pair); 1594 1595 error = spa_open(firstname, &spa, FTAG); 1596 if (error != 0) 1597 return (error); 1598 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1599 spa_close(spa, FTAG); 1600 1601 if (needsuspend) { 1602 suspended = fnvlist_alloc(); 1603 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1604 pair = nvlist_next_nvpair(snaps, pair)) { 1605 char fsname[ZFS_MAX_DATASET_NAME_LEN]; 1606 char *snapname = nvpair_name(pair); 1607 char *atp; 1608 void *cookie; 1609 1610 atp = strchr(snapname, '@'); 1611 if (atp == NULL) { 1612 error = SET_ERROR(EINVAL); 1613 break; 1614 } 1615 (void) strlcpy(fsname, snapname, atp - snapname + 1); 1616 1617 error = zil_suspend(fsname, &cookie); 1618 if (error != 0) 1619 break; 1620 fnvlist_add_uint64(suspended, fsname, 1621 (uintptr_t)cookie); 1622 } 1623 } 1624 1625 ddsa.ddsa_snaps = snaps; 1626 ddsa.ddsa_props = props; 1627 ddsa.ddsa_errors = errors; 1628 ddsa.ddsa_cr = CRED(); 1629 1630 if (error == 0) { 1631 error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, 1632 dsl_dataset_snapshot_sync, &ddsa, 1633 fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL); 1634 } 1635 1636 if (suspended != NULL) { 1637 for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; 1638 pair = nvlist_next_nvpair(suspended, pair)) { 1639 zil_resume((void *)(uintptr_t) 1640 fnvpair_value_uint64(pair)); 1641 } 1642 fnvlist_free(suspended); 1643 } 1644 1645#ifdef __FreeBSD__ 1646#ifdef _KERNEL 1647 if (error == 0) { 1648 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1649 pair = nvlist_next_nvpair(snaps, pair)) { 1650 char *snapname = nvpair_name(pair); 1651 zvol_create_minors(snapname); 1652 } 1653 } 1654#endif 1655#endif 1656 return (error); 1657} 1658 1659typedef struct dsl_dataset_snapshot_tmp_arg { 1660 const char *ddsta_fsname; 1661 const char *ddsta_snapname; 1662 minor_t ddsta_cleanup_minor; 1663 const char *ddsta_htag; 1664} dsl_dataset_snapshot_tmp_arg_t; 1665 1666static int 1667dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) 1668{ 1669 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1670 dsl_pool_t *dp = dmu_tx_pool(tx); 1671 dsl_dataset_t *ds; 1672 int error; 1673 1674 error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); 1675 if (error != 0) 1676 return (error); 1677 1678 /* NULL cred means no limit check for tmp snapshot */ 1679 error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, 1680 tx, B_FALSE, 0, NULL); 1681 if (error != 0) { 1682 dsl_dataset_rele(ds, FTAG); 1683 return (error); 1684 } 1685 1686 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1687 dsl_dataset_rele(ds, FTAG); 1688 return (SET_ERROR(ENOTSUP)); 1689 } 1690 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1691 B_TRUE, tx); 1692 if (error != 0) { 1693 dsl_dataset_rele(ds, FTAG); 1694 return (error); 1695 } 1696 1697 dsl_dataset_rele(ds, FTAG); 1698 return (0); 1699} 1700 1701static void 1702dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1703{ 1704 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1705 dsl_pool_t *dp = dmu_tx_pool(tx); 1706 dsl_dataset_t *ds; 1707 1708 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1709 1710 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1711 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1712 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1713 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1714 1715 dsl_dataset_rele(ds, FTAG); 1716} 1717 1718int 1719dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1720 minor_t cleanup_minor, const char *htag) 1721{ 1722 dsl_dataset_snapshot_tmp_arg_t ddsta; 1723 int error; 1724 spa_t *spa; 1725 boolean_t needsuspend; 1726 void *cookie; 1727 1728 ddsta.ddsta_fsname = fsname; 1729 ddsta.ddsta_snapname = snapname; 1730 ddsta.ddsta_cleanup_minor = cleanup_minor; 1731 ddsta.ddsta_htag = htag; 1732 1733 error = spa_open(fsname, &spa, FTAG); 1734 if (error != 0) 1735 return (error); 1736 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1737 spa_close(spa, FTAG); 1738 1739 if (needsuspend) { 1740 error = zil_suspend(fsname, &cookie); 1741 if (error != 0) 1742 return (error); 1743 } 1744 1745 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1746 dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED); 1747 1748 if (needsuspend) 1749 zil_resume(cookie); 1750 return (error); 1751} 1752 1753void 1754dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1755{ 1756 ASSERT(dmu_tx_is_syncing(tx)); 1757 ASSERT(ds->ds_objset != NULL); 1758 ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0); 1759 1760 /* 1761 * in case we had to change ds_fsid_guid when we opened it, 1762 * sync it out now. 1763 */ 1764 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1765 dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid; 1766 1767 if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) { 1768 VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, 1769 ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1, 1770 &ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx)); 1771 VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, 1772 ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1, 1773 &ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx)); 1774 VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, 1775 ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1, 1776 &ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx)); 1777 ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0; 1778 ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0; 1779 ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0; 1780 } 1781 1782 dmu_objset_sync(ds->ds_objset, zio, tx); 1783 1784 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 1785 if (ds->ds_feature_activation_needed[f]) { 1786 if (ds->ds_feature_inuse[f]) 1787 continue; 1788 dsl_dataset_activate_feature(ds->ds_object, f, tx); 1789 ds->ds_feature_inuse[f] = B_TRUE; 1790 } 1791 } 1792} 1793 1794static int 1795deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 1796{ 1797 dsl_deadlist_t *dl = arg; 1798 dsl_deadlist_insert(dl, bp, tx); 1799 return (0); 1800} 1801 1802void 1803dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) 1804{ 1805 objset_t *os = ds->ds_objset; 1806 1807 bplist_iterate(&ds->ds_pending_deadlist, 1808 deadlist_enqueue_cb, &ds->ds_deadlist, tx); 1809 1810 if (os->os_synced_dnodes != NULL) { 1811 multilist_destroy(os->os_synced_dnodes); 1812 os->os_synced_dnodes = NULL; 1813 } 1814 1815 ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); 1816 1817 dmu_buf_rele(ds->ds_dbuf, ds); 1818} 1819 1820int 1821get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val) 1822{ 1823 uint64_t count = 0; 1824 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1825 zap_cursor_t zc; 1826 zap_attribute_t za; 1827 1828 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1829 1830 /* 1831 * There may be missing entries in ds_next_clones_obj 1832 * due to a bug in a previous version of the code. 1833 * Only trust it if it has the right number of entries. 1834 */ 1835 if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { 1836 VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 1837 &count)); 1838 } 1839 if (count != dsl_dataset_phys(ds)->ds_num_children - 1) { 1840 return (ENOENT); 1841 } 1842 for (zap_cursor_init(&zc, mos, 1843 dsl_dataset_phys(ds)->ds_next_clones_obj); 1844 zap_cursor_retrieve(&zc, &za) == 0; 1845 zap_cursor_advance(&zc)) { 1846 dsl_dataset_t *clone; 1847 char buf[ZFS_MAX_DATASET_NAME_LEN]; 1848 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1849 za.za_first_integer, FTAG, &clone)); 1850 dsl_dir_name(clone->ds_dir, buf); 1851 fnvlist_add_boolean(val, buf); 1852 dsl_dataset_rele(clone, FTAG); 1853 } 1854 zap_cursor_fini(&zc); 1855 return (0); 1856} 1857 1858void 1859get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1860{ 1861 nvlist_t *propval = fnvlist_alloc(); 1862 nvlist_t *val; 1863 1864 /* 1865 * We use nvlist_alloc() instead of fnvlist_alloc() because the 1866 * latter would allocate the list with NV_UNIQUE_NAME flag. 1867 * As a result, every time a clone name is appended to the list 1868 * it would be (linearly) searched for for a duplicate name. 1869 * We already know that all clone names must be unique and we 1870 * want avoid the quadratic complexity of double-checking that 1871 * because we can have a large number of clones. 1872 */ 1873 VERIFY0(nvlist_alloc(&val, 0, KM_SLEEP)); 1874 1875 if (get_clones_stat_impl(ds, val) == 0) { 1876 fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1877 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), 1878 propval); 1879 } 1880 1881 nvlist_free(val); 1882 nvlist_free(propval); 1883} 1884 1885/* 1886 * Returns a string that represents the receive resume stats token. It should 1887 * be freed with strfree(). 1888 */ 1889char * 1890get_receive_resume_stats_impl(dsl_dataset_t *ds) 1891{ 1892 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1893 1894 if (dsl_dataset_has_resume_receive_state(ds)) { 1895 char *str; 1896 void *packed; 1897 uint8_t *compressed; 1898 uint64_t val; 1899 nvlist_t *token_nv = fnvlist_alloc(); 1900 size_t packed_size, compressed_size; 1901 1902 if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1903 DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) { 1904 fnvlist_add_uint64(token_nv, "fromguid", val); 1905 } 1906 if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1907 DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) { 1908 fnvlist_add_uint64(token_nv, "object", val); 1909 } 1910 if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1911 DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) { 1912 fnvlist_add_uint64(token_nv, "offset", val); 1913 } 1914 if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1915 DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) { 1916 fnvlist_add_uint64(token_nv, "bytes", val); 1917 } 1918 if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1919 DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) { 1920 fnvlist_add_uint64(token_nv, "toguid", val); 1921 } 1922 char buf[256]; 1923 if (zap_lookup(dp->dp_meta_objset, ds->ds_object, 1924 DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) { 1925 fnvlist_add_string(token_nv, "toname", buf); 1926 } 1927 if (zap_contains(dp->dp_meta_objset, ds->ds_object, 1928 DS_FIELD_RESUME_LARGEBLOCK) == 0) { 1929 fnvlist_add_boolean(token_nv, "largeblockok"); 1930 } 1931 if (zap_contains(dp->dp_meta_objset, ds->ds_object, 1932 DS_FIELD_RESUME_EMBEDOK) == 0) { 1933 fnvlist_add_boolean(token_nv, "embedok"); 1934 } 1935 if (zap_contains(dp->dp_meta_objset, ds->ds_object, 1936 DS_FIELD_RESUME_COMPRESSOK) == 0) { 1937 fnvlist_add_boolean(token_nv, "compressok"); 1938 } 1939 packed = fnvlist_pack(token_nv, &packed_size); 1940 fnvlist_free(token_nv); 1941 compressed = kmem_alloc(packed_size, KM_SLEEP); 1942 1943 compressed_size = gzip_compress(packed, compressed, 1944 packed_size, packed_size, 6); 1945 1946 zio_cksum_t cksum; 1947 fletcher_4_native(compressed, compressed_size, NULL, &cksum); 1948 1949 str = kmem_alloc(compressed_size * 2 + 1, KM_SLEEP); 1950 for (int i = 0; i < compressed_size; i++) { 1951 (void) sprintf(str + i * 2, "%02x", compressed[i]); 1952 } 1953 str[compressed_size * 2] = '\0'; 1954 char *propval = kmem_asprintf("%u-%llx-%llx-%s", 1955 ZFS_SEND_RESUME_TOKEN_VERSION, 1956 (longlong_t)cksum.zc_word[0], 1957 (longlong_t)packed_size, str); 1958 kmem_free(packed, packed_size); 1959 kmem_free(str, compressed_size * 2 + 1); 1960 kmem_free(compressed, packed_size); 1961 return (propval); 1962 } 1963 return (spa_strdup("")); 1964} 1965 1966/* 1967 * Returns a string that represents the receive resume stats token of the 1968 * dataset's child. It should be freed with strfree(). 1969 */ 1970char * 1971get_child_receive_stats(dsl_dataset_t *ds) 1972{ 1973 char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; 1974 dsl_dataset_t *recv_ds; 1975 dsl_dataset_name(ds, recvname); 1976 if (strlcat(recvname, "/", sizeof (recvname)) < 1977 sizeof (recvname) && 1978 strlcat(recvname, recv_clone_name, sizeof (recvname)) < 1979 sizeof (recvname) && 1980 dsl_dataset_hold(ds->ds_dir->dd_pool, recvname, FTAG, 1981 &recv_ds) == 0) { 1982 char *propval = get_receive_resume_stats_impl(recv_ds); 1983 dsl_dataset_rele(recv_ds, FTAG); 1984 return (propval); 1985 } 1986 return (spa_strdup("")); 1987} 1988 1989static void 1990get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv) 1991{ 1992 char *propval = get_receive_resume_stats_impl(ds); 1993 if (strcmp(propval, "") != 0) { 1994 dsl_prop_nvlist_add_string(nv, 1995 ZFS_PROP_RECEIVE_RESUME_TOKEN, propval); 1996 } else { 1997 char *childval = get_child_receive_stats(ds); 1998 if (strcmp(childval, "") != 0) { 1999 dsl_prop_nvlist_add_string(nv, 2000 ZFS_PROP_RECEIVE_RESUME_TOKEN, childval); 2001 } 2002 strfree(childval); 2003 } 2004 strfree(propval); 2005} 2006 2007uint64_t 2008dsl_get_refratio(dsl_dataset_t *ds) 2009{ 2010 uint64_t ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 : 2011 (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 / 2012 dsl_dataset_phys(ds)->ds_compressed_bytes); 2013 return (ratio); 2014} 2015 2016uint64_t 2017dsl_get_logicalreferenced(dsl_dataset_t *ds) 2018{ 2019 return (dsl_dataset_phys(ds)->ds_uncompressed_bytes); 2020} 2021 2022uint64_t 2023dsl_get_compressratio(dsl_dataset_t *ds) 2024{ 2025 if (ds->ds_is_snapshot) { 2026 return (dsl_get_refratio(ds)); 2027 } else { 2028 dsl_dir_t *dd = ds->ds_dir; 2029 mutex_enter(&dd->dd_lock); 2030 uint64_t val = dsl_dir_get_compressratio(dd); 2031 mutex_exit(&dd->dd_lock); 2032 return (val); 2033 } 2034} 2035 2036uint64_t 2037dsl_get_used(dsl_dataset_t *ds) 2038{ 2039 if (ds->ds_is_snapshot) { 2040 return (dsl_dataset_phys(ds)->ds_unique_bytes); 2041 } else { 2042 dsl_dir_t *dd = ds->ds_dir; 2043 mutex_enter(&dd->dd_lock); 2044 uint64_t val = dsl_dir_get_used(dd); 2045 mutex_exit(&dd->dd_lock); 2046 return (val); 2047 } 2048} 2049 2050uint64_t 2051dsl_get_creation(dsl_dataset_t *ds) 2052{ 2053 return (dsl_dataset_phys(ds)->ds_creation_time); 2054} 2055 2056uint64_t 2057dsl_get_creationtxg(dsl_dataset_t *ds) 2058{ 2059 return (dsl_dataset_phys(ds)->ds_creation_txg); 2060} 2061 2062uint64_t 2063dsl_get_refquota(dsl_dataset_t *ds) 2064{ 2065 return (ds->ds_quota); 2066} 2067 2068uint64_t 2069dsl_get_refreservation(dsl_dataset_t *ds) 2070{ 2071 return (ds->ds_reserved); 2072} 2073 2074uint64_t 2075dsl_get_guid(dsl_dataset_t *ds) 2076{ 2077 return (dsl_dataset_phys(ds)->ds_guid); 2078} 2079 2080uint64_t 2081dsl_get_unique(dsl_dataset_t *ds) 2082{ 2083 return (dsl_dataset_phys(ds)->ds_unique_bytes); 2084} 2085 2086uint64_t 2087dsl_get_objsetid(dsl_dataset_t *ds) 2088{ 2089 return (ds->ds_object); 2090} 2091 2092uint64_t 2093dsl_get_userrefs(dsl_dataset_t *ds) 2094{ 2095 return (ds->ds_userrefs); 2096} 2097 2098uint64_t 2099dsl_get_defer_destroy(dsl_dataset_t *ds) 2100{ 2101 return (DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 2102} 2103 2104uint64_t 2105dsl_get_referenced(dsl_dataset_t *ds) 2106{ 2107 return (dsl_dataset_phys(ds)->ds_referenced_bytes); 2108} 2109 2110uint64_t 2111dsl_get_numclones(dsl_dataset_t *ds) 2112{ 2113 ASSERT(ds->ds_is_snapshot); 2114 return (dsl_dataset_phys(ds)->ds_num_children - 1); 2115} 2116 2117uint64_t 2118dsl_get_inconsistent(dsl_dataset_t *ds) 2119{ 2120 return ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT) ? 2121 1 : 0); 2122} 2123 2124uint64_t 2125dsl_get_available(dsl_dataset_t *ds) 2126{ 2127 uint64_t refdbytes = dsl_get_referenced(ds); 2128 uint64_t availbytes = dsl_dir_space_available(ds->ds_dir, 2129 NULL, 0, TRUE); 2130 if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) { 2131 availbytes += 2132 ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes; 2133 } 2134 if (ds->ds_quota != 0) { 2135 /* 2136 * Adjust available bytes according to refquota 2137 */ 2138 if (refdbytes < ds->ds_quota) { 2139 availbytes = MIN(availbytes, 2140 ds->ds_quota - refdbytes); 2141 } else { 2142 availbytes = 0; 2143 } 2144 } 2145 return (availbytes); 2146} 2147 2148int 2149dsl_get_written(dsl_dataset_t *ds, uint64_t *written) 2150{ 2151 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2152 dsl_dataset_t *prev; 2153 int err = dsl_dataset_hold_obj(dp, 2154 dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); 2155 if (err == 0) { 2156 uint64_t comp, uncomp; 2157 err = dsl_dataset_space_written(prev, ds, written, 2158 &comp, &uncomp); 2159 dsl_dataset_rele(prev, FTAG); 2160 } 2161 return (err); 2162} 2163 2164/* 2165 * 'snap' should be a buffer of size ZFS_MAX_DATASET_NAME_LEN. 2166 */ 2167int 2168dsl_get_prev_snap(dsl_dataset_t *ds, char *snap) 2169{ 2170 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2171 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { 2172 dsl_dataset_name(ds->ds_prev, snap); 2173 return (0); 2174 } else { 2175 return (ENOENT); 2176 } 2177} 2178 2179/* 2180 * Returns the mountpoint property and source for the given dataset in the value 2181 * and source buffers. The value buffer must be at least as large as MAXPATHLEN 2182 * and the source buffer as least as large a ZFS_MAX_DATASET_NAME_LEN. 2183 * Returns 0 on success and an error on failure. 2184 */ 2185int 2186dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value, 2187 char *source) 2188{ 2189 int error; 2190 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2191 2192 /* Retrieve the mountpoint value stored in the zap opbject */ 2193 error = dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), 1, 2194 ZAP_MAXVALUELEN, value, source); 2195 if (error != 0) { 2196 return (error); 2197 } 2198 2199 /* Process the dsname and source to find the full mountpoint string */ 2200 if (value[0] == '/') { 2201 char *buf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); 2202 char *root = buf; 2203 const char *relpath; 2204 2205 /* 2206 * If we inherit the mountpoint, even from a dataset 2207 * with a received value, the source will be the path of 2208 * the dataset we inherit from. If source is 2209 * ZPROP_SOURCE_VAL_RECVD, the received value is not 2210 * inherited. 2211 */ 2212 if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) { 2213 relpath = ""; 2214 } else { 2215 ASSERT0(strncmp(dsname, source, strlen(source))); 2216 relpath = dsname + strlen(source); 2217 if (relpath[0] == '/') 2218 relpath++; 2219 } 2220 2221 spa_altroot(dp->dp_spa, root, ZAP_MAXVALUELEN); 2222 2223 /* 2224 * Special case an alternate root of '/'. This will 2225 * avoid having multiple leading slashes in the 2226 * mountpoint path. 2227 */ 2228 if (strcmp(root, "/") == 0) 2229 root++; 2230 2231 /* 2232 * If the mountpoint is '/' then skip over this 2233 * if we are obtaining either an alternate root or 2234 * an inherited mountpoint. 2235 */ 2236 char *mnt = value; 2237 if (value[1] == '\0' && (root[0] != '\0' || 2238 relpath[0] != '\0')) 2239 mnt = value + 1; 2240 2241 if (relpath[0] == '\0') { 2242 (void) snprintf(value, ZAP_MAXVALUELEN, "%s%s", 2243 root, mnt); 2244 } else { 2245 (void) snprintf(value, ZAP_MAXVALUELEN, "%s%s%s%s", 2246 root, mnt, relpath[0] == '@' ? "" : "/", 2247 relpath); 2248 } 2249 kmem_free(buf, ZAP_MAXVALUELEN); 2250 } else { 2251 /* 'legacy' or 'none' */ 2252 (void) snprintf(value, ZAP_MAXVALUELEN, "%s", value); 2253 } 2254 return (0); 2255} 2256 2257void 2258dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 2259{ 2260 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2261 2262 ASSERT(dsl_pool_config_held(dp)); 2263 2264 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, 2265 dsl_get_refratio(ds)); 2266 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 2267 dsl_get_logicalreferenced(ds)); 2268 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 2269 dsl_get_compressratio(ds)); 2270 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 2271 dsl_get_used(ds)); 2272 2273 if (ds->ds_is_snapshot) { 2274 get_clones_stat(ds, nv); 2275 } else { 2276 char buf[ZFS_MAX_DATASET_NAME_LEN]; 2277 if (dsl_get_prev_snap(ds, buf) == 0) 2278 dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, 2279 buf); 2280 dsl_dir_stats(ds->ds_dir, nv); 2281 } 2282 2283 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, 2284 dsl_get_available(ds)); 2285 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, 2286 dsl_get_referenced(ds)); 2287 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 2288 dsl_get_creation(ds)); 2289 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 2290 dsl_get_creationtxg(ds)); 2291 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 2292 dsl_get_refquota(ds)); 2293 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 2294 dsl_get_refreservation(ds)); 2295 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 2296 dsl_get_guid(ds)); 2297 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 2298 dsl_get_unique(ds)); 2299 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 2300 dsl_get_objsetid(ds)); 2301 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 2302 dsl_get_userrefs(ds)); 2303 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 2304 dsl_get_defer_destroy(ds)); 2305 2306 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 2307 uint64_t written; 2308 if (dsl_get_written(ds, &written) == 0) { 2309 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 2310 written); 2311 } 2312 } 2313 2314 if (!dsl_dataset_is_snapshot(ds)) { 2315 /* 2316 * A failed "newfs" (e.g. full) resumable receive leaves 2317 * the stats set on this dataset. Check here for the prop. 2318 */ 2319 get_receive_resume_stats(ds, nv); 2320 2321 /* 2322 * A failed incremental resumable receive leaves the 2323 * stats set on our child named "%recv". Check the child 2324 * for the prop. 2325 */ 2326 /* 6 extra bytes for /%recv */ 2327 char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; 2328 dsl_dataset_t *recv_ds; 2329 dsl_dataset_name(ds, recvname); 2330 if (strlcat(recvname, "/", sizeof (recvname)) < 2331 sizeof (recvname) && 2332 strlcat(recvname, recv_clone_name, sizeof (recvname)) < 2333 sizeof (recvname) && 2334 dsl_dataset_hold(dp, recvname, FTAG, &recv_ds) == 0) { 2335 get_receive_resume_stats(recv_ds, nv); 2336 dsl_dataset_rele(recv_ds, FTAG); 2337 } 2338 } 2339} 2340 2341void 2342dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 2343{ 2344 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2345 ASSERT(dsl_pool_config_held(dp)); 2346 2347 stat->dds_creation_txg = dsl_get_creationtxg(ds); 2348 stat->dds_inconsistent = dsl_get_inconsistent(ds); 2349 stat->dds_guid = dsl_get_guid(ds); 2350 stat->dds_origin[0] = '\0'; 2351 if (ds->ds_is_snapshot) { 2352 stat->dds_is_snapshot = B_TRUE; 2353 stat->dds_num_clones = dsl_get_numclones(ds); 2354 } else { 2355 stat->dds_is_snapshot = B_FALSE; 2356 stat->dds_num_clones = 0; 2357 2358 if (dsl_dir_is_clone(ds->ds_dir)) { 2359 dsl_dir_get_origin(ds->ds_dir, stat->dds_origin); 2360 } 2361 } 2362} 2363 2364uint64_t 2365dsl_dataset_fsid_guid(dsl_dataset_t *ds) 2366{ 2367 return (ds->ds_fsid_guid); 2368} 2369 2370void 2371dsl_dataset_space(dsl_dataset_t *ds, 2372 uint64_t *refdbytesp, uint64_t *availbytesp, 2373 uint64_t *usedobjsp, uint64_t *availobjsp) 2374{ 2375 *refdbytesp = dsl_dataset_phys(ds)->ds_referenced_bytes; 2376 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2377 if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) 2378 *availbytesp += 2379 ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes; 2380 if (ds->ds_quota != 0) { 2381 /* 2382 * Adjust available bytes according to refquota 2383 */ 2384 if (*refdbytesp < ds->ds_quota) 2385 *availbytesp = MIN(*availbytesp, 2386 ds->ds_quota - *refdbytesp); 2387 else 2388 *availbytesp = 0; 2389 } 2390 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 2391 *usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp); 2392 rrw_exit(&ds->ds_bp_rwlock, FTAG); 2393 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2394} 2395 2396boolean_t 2397dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) 2398{ 2399 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2400 uint64_t birth; 2401 2402 ASSERT(dsl_pool_config_held(dp)); 2403 if (snap == NULL) 2404 return (B_FALSE); 2405 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 2406 birth = dsl_dataset_get_blkptr(ds)->blk_birth; 2407 rrw_exit(&ds->ds_bp_rwlock, FTAG); 2408 if (birth > dsl_dataset_phys(snap)->ds_creation_txg) { 2409 objset_t *os, *os_snap; 2410 /* 2411 * It may be that only the ZIL differs, because it was 2412 * reset in the head. Don't count that as being 2413 * modified. 2414 */ 2415 if (dmu_objset_from_ds(ds, &os) != 0) 2416 return (B_TRUE); 2417 if (dmu_objset_from_ds(snap, &os_snap) != 0) 2418 return (B_TRUE); 2419 return (bcmp(&os->os_phys->os_meta_dnode, 2420 &os_snap->os_phys->os_meta_dnode, 2421 sizeof (os->os_phys->os_meta_dnode)) != 0); 2422 } 2423 return (B_FALSE); 2424} 2425 2426typedef struct dsl_dataset_rename_snapshot_arg { 2427 const char *ddrsa_fsname; 2428 const char *ddrsa_oldsnapname; 2429 const char *ddrsa_newsnapname; 2430 boolean_t ddrsa_recursive; 2431 dmu_tx_t *ddrsa_tx; 2432} dsl_dataset_rename_snapshot_arg_t; 2433 2434/* ARGSUSED */ 2435static int 2436dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 2437 dsl_dataset_t *hds, void *arg) 2438{ 2439 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 2440 int error; 2441 uint64_t val; 2442 2443 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 2444 if (error != 0) { 2445 /* ignore nonexistent snapshots */ 2446 return (error == ENOENT ? 0 : error); 2447 } 2448 2449 /* new name should not exist */ 2450 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 2451 if (error == 0) 2452 error = SET_ERROR(EEXIST); 2453 else if (error == ENOENT) 2454 error = 0; 2455 2456 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2457 if (dsl_dir_namelen(hds->ds_dir) + 1 + 2458 strlen(ddrsa->ddrsa_newsnapname) >= ZFS_MAX_DATASET_NAME_LEN) 2459 error = SET_ERROR(ENAMETOOLONG); 2460 2461 return (error); 2462} 2463 2464static int 2465dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 2466{ 2467 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 2468 dsl_pool_t *dp = dmu_tx_pool(tx); 2469 dsl_dataset_t *hds; 2470 int error; 2471 2472 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 2473 if (error != 0) 2474 return (error); 2475 2476 if (ddrsa->ddrsa_recursive) { 2477 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 2478 dsl_dataset_rename_snapshot_check_impl, ddrsa, 2479 DS_FIND_CHILDREN); 2480 } else { 2481 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 2482 } 2483 dsl_dataset_rele(hds, FTAG); 2484 return (error); 2485} 2486 2487static int 2488dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 2489 dsl_dataset_t *hds, void *arg) 2490{ 2491#ifdef __FreeBSD__ 2492#ifdef _KERNEL 2493 char *oldname, *newname; 2494#endif 2495#endif 2496 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 2497 dsl_dataset_t *ds; 2498 uint64_t val; 2499 dmu_tx_t *tx = ddrsa->ddrsa_tx; 2500 int error; 2501 2502 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 2503 ASSERT(error == 0 || error == ENOENT); 2504 if (error == ENOENT) { 2505 /* ignore nonexistent snapshots */ 2506 return (0); 2507 } 2508 2509 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 2510 2511 /* log before we change the name */ 2512 spa_history_log_internal_ds(ds, "rename", tx, 2513 "-> @%s", ddrsa->ddrsa_newsnapname); 2514 2515 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx, 2516 B_FALSE)); 2517 mutex_enter(&ds->ds_lock); 2518 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 2519 mutex_exit(&ds->ds_lock); 2520 VERIFY0(zap_add(dp->dp_meta_objset, 2521 dsl_dataset_phys(hds)->ds_snapnames_zapobj, 2522 ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 2523 2524#ifdef __FreeBSD__ 2525#ifdef _KERNEL 2526 oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2527 newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2528 snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 2529 ddrsa->ddrsa_oldsnapname); 2530 snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 2531 ddrsa->ddrsa_newsnapname); 2532 zfsvfs_update_fromname(oldname, newname); 2533 zvol_rename_minors(oldname, newname); 2534 kmem_free(newname, MAXPATHLEN); 2535 kmem_free(oldname, MAXPATHLEN); 2536#endif 2537#endif 2538 dsl_dataset_rele(ds, FTAG); 2539 2540 return (0); 2541} 2542 2543static void 2544dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 2545{ 2546 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 2547 dsl_pool_t *dp = dmu_tx_pool(tx); 2548 dsl_dataset_t *hds; 2549 2550 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 2551 ddrsa->ddrsa_tx = tx; 2552 if (ddrsa->ddrsa_recursive) { 2553 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 2554 dsl_dataset_rename_snapshot_sync_impl, ddrsa, 2555 DS_FIND_CHILDREN)); 2556 } else { 2557 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 2558 } 2559 dsl_dataset_rele(hds, FTAG); 2560} 2561 2562int 2563dsl_dataset_rename_snapshot(const char *fsname, 2564 const char *oldsnapname, const char *newsnapname, boolean_t recursive) 2565{ 2566 dsl_dataset_rename_snapshot_arg_t ddrsa; 2567 2568 ddrsa.ddrsa_fsname = fsname; 2569 ddrsa.ddrsa_oldsnapname = oldsnapname; 2570 ddrsa.ddrsa_newsnapname = newsnapname; 2571 ddrsa.ddrsa_recursive = recursive; 2572 2573 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 2574 dsl_dataset_rename_snapshot_sync, &ddrsa, 2575 1, ZFS_SPACE_CHECK_RESERVED)); 2576} 2577 2578/* 2579 * If we're doing an ownership handoff, we need to make sure that there is 2580 * only one long hold on the dataset. We're not allowed to change anything here 2581 * so we don't permanently release the long hold or regular hold here. We want 2582 * to do this only when syncing to avoid the dataset unexpectedly going away 2583 * when we release the long hold. 2584 */ 2585static int 2586dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 2587{ 2588 boolean_t held; 2589 2590 if (!dmu_tx_is_syncing(tx)) 2591 return (0); 2592 2593 if (owner != NULL) { 2594 VERIFY3P(ds->ds_owner, ==, owner); 2595 dsl_dataset_long_rele(ds, owner); 2596 } 2597 2598 held = dsl_dataset_long_held(ds); 2599 2600 if (owner != NULL) 2601 dsl_dataset_long_hold(ds, owner); 2602 2603 if (held) 2604 return (SET_ERROR(EBUSY)); 2605 2606 return (0); 2607} 2608 2609int 2610dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 2611{ 2612 dsl_dataset_rollback_arg_t *ddra = arg; 2613 dsl_pool_t *dp = dmu_tx_pool(tx); 2614 dsl_dataset_t *ds; 2615 int64_t unused_refres_delta; 2616 int error; 2617 2618 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 2619 if (error != 0) 2620 return (error); 2621 2622 /* must not be a snapshot */ 2623 if (ds->ds_is_snapshot) { 2624 dsl_dataset_rele(ds, FTAG); 2625 return (SET_ERROR(EINVAL)); 2626 } 2627 2628 /* must have a most recent snapshot */ 2629 if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) { 2630 dsl_dataset_rele(ds, FTAG); 2631 return (SET_ERROR(ESRCH)); 2632 } 2633 2634 /* 2635 * No rollback to a snapshot created in the current txg, because 2636 * the rollback may dirty the dataset and create blocks that are 2637 * not reachable from the rootbp while having a birth txg that 2638 * falls into the snapshot's range. 2639 */ 2640 if (dmu_tx_is_syncing(tx) && 2641 dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) { 2642 dsl_dataset_rele(ds, FTAG); 2643 return (SET_ERROR(EAGAIN)); 2644 } 2645 2646 /* 2647 * If the expected target snapshot is specified, then check that 2648 * the latest snapshot is it. 2649 */ 2650 if (ddra->ddra_tosnap != NULL) { 2651 dsl_dataset_t *snapds; 2652 2653 /* Check if the target snapshot exists at all. */ 2654 error = dsl_dataset_hold(dp, ddra->ddra_tosnap, FTAG, &snapds); 2655 if (error != 0) { 2656 /* 2657 * ESRCH is used to signal that the target snapshot does 2658 * not exist, while ENOENT is used to report that 2659 * the rolled back dataset does not exist. 2660 * ESRCH is also used to cover other cases where the 2661 * target snapshot is not related to the dataset being 2662 * rolled back such as being in a different pool. 2663 */ 2664 if (error == ENOENT || error == EXDEV) 2665 error = SET_ERROR(ESRCH); 2666 dsl_dataset_rele(ds, FTAG); 2667 return (error); 2668 } 2669 ASSERT(snapds->ds_is_snapshot); 2670 2671 /* Check if the snapshot is the latest snapshot indeed. */ 2672 if (snapds != ds->ds_prev) { 2673 /* 2674 * Distinguish between the case where the only problem 2675 * is intervening snapshots (EEXIST) vs the snapshot 2676 * not being a valid target for rollback (ESRCH). 2677 */ 2678 if (snapds->ds_dir == ds->ds_dir || 2679 (dsl_dir_is_clone(ds->ds_dir) && 2680 dsl_dir_phys(ds->ds_dir)->dd_origin_obj == 2681 snapds->ds_object)) { 2682 error = SET_ERROR(EEXIST); 2683 } else { 2684 error = SET_ERROR(ESRCH); 2685 } 2686 dsl_dataset_rele(snapds, FTAG); 2687 dsl_dataset_rele(ds, FTAG); 2688 return (error); 2689 } 2690 dsl_dataset_rele(snapds, FTAG); 2691 } 2692 2693 /* must not have any bookmarks after the most recent snapshot */ 2694 nvlist_t *proprequest = fnvlist_alloc(); 2695 fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); 2696 nvlist_t *bookmarks = fnvlist_alloc(); 2697 error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); 2698 fnvlist_free(proprequest); 2699 if (error != 0) { 2700 dsl_dataset_rele(ds, FTAG); 2701 return (error); 2702 } 2703 for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL); 2704 pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) { 2705 nvlist_t *valuenv = 2706 fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), 2707 zfs_prop_to_name(ZFS_PROP_CREATETXG)); 2708 uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); 2709 if (createtxg > dsl_dataset_phys(ds)->ds_prev_snap_txg) { 2710 fnvlist_free(bookmarks); 2711 dsl_dataset_rele(ds, FTAG); 2712 return (SET_ERROR(EEXIST)); 2713 } 2714 } 2715 fnvlist_free(bookmarks); 2716 2717 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 2718 if (error != 0) { 2719 dsl_dataset_rele(ds, FTAG); 2720 return (error); 2721 } 2722 2723 /* 2724 * Check if the snap we are rolling back to uses more than 2725 * the refquota. 2726 */ 2727 if (ds->ds_quota != 0 && 2728 dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes > ds->ds_quota) { 2729 dsl_dataset_rele(ds, FTAG); 2730 return (SET_ERROR(EDQUOT)); 2731 } 2732 2733 /* 2734 * When we do the clone swap, we will temporarily use more space 2735 * due to the refreservation (the head will no longer have any 2736 * unique space, so the entire amount of the refreservation will need 2737 * to be free). We will immediately destroy the clone, freeing 2738 * this space, but the freeing happens over many txg's. 2739 */ 2740 unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 2741 dsl_dataset_phys(ds)->ds_unique_bytes); 2742 2743 if (unused_refres_delta > 0 && 2744 unused_refres_delta > 2745 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 2746 dsl_dataset_rele(ds, FTAG); 2747 return (SET_ERROR(ENOSPC)); 2748 } 2749 2750 dsl_dataset_rele(ds, FTAG); 2751 return (0); 2752} 2753 2754void 2755dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 2756{ 2757 dsl_dataset_rollback_arg_t *ddra = arg; 2758 dsl_pool_t *dp = dmu_tx_pool(tx); 2759 dsl_dataset_t *ds, *clone; 2760 uint64_t cloneobj; 2761 char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 2762 2763 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 2764 2765 dsl_dataset_name(ds->ds_prev, namebuf); 2766 fnvlist_add_string(ddra->ddra_result, "target", namebuf); 2767 2768 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 2769 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 2770 2771 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 2772 2773 dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 2774 dsl_dataset_zero_zil(ds, tx); 2775 2776 dsl_destroy_head_sync_impl(clone, tx); 2777 2778 dsl_dataset_rele(clone, FTAG); 2779 dsl_dataset_rele(ds, FTAG); 2780} 2781 2782/* 2783 * Rolls back the given filesystem or volume to the most recent snapshot. 2784 * The name of the most recent snapshot will be returned under key "target" 2785 * in the result nvlist. 2786 * 2787 * If owner != NULL: 2788 * - The existing dataset MUST be owned by the specified owner at entry 2789 * - Upon return, dataset will still be held by the same owner, whether we 2790 * succeed or not. 2791 * 2792 * This mode is required any time the existing filesystem is mounted. See 2793 * notes above zfs_suspend_fs() for further details. 2794 */ 2795int 2796dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner, 2797 nvlist_t *result) 2798{ 2799 dsl_dataset_rollback_arg_t ddra; 2800 2801 ddra.ddra_fsname = fsname; 2802 ddra.ddra_tosnap = tosnap; 2803 ddra.ddra_owner = owner; 2804 ddra.ddra_result = result; 2805 2806 return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 2807 dsl_dataset_rollback_sync, &ddra, 2808 1, ZFS_SPACE_CHECK_RESERVED)); 2809} 2810 2811struct promotenode { 2812 list_node_t link; 2813 dsl_dataset_t *ds; 2814}; 2815 2816static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2817static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 2818 void *tag); 2819static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 2820 2821int 2822dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 2823{ 2824 dsl_dataset_promote_arg_t *ddpa = arg; 2825 dsl_pool_t *dp = dmu_tx_pool(tx); 2826 dsl_dataset_t *hds; 2827 struct promotenode *snap; 2828 dsl_dataset_t *origin_ds; 2829 int err; 2830 uint64_t unused; 2831 uint64_t ss_mv_cnt; 2832 size_t max_snap_len; 2833 boolean_t conflicting_snaps; 2834 2835 err = promote_hold(ddpa, dp, FTAG); 2836 if (err != 0) 2837 return (err); 2838 2839 hds = ddpa->ddpa_clone; 2840 snap = list_head(&ddpa->shared_snaps); 2841 origin_ds = snap->ds; 2842 max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1; 2843 2844 snap = list_head(&ddpa->origin_snaps); 2845 2846 if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) { 2847 promote_rele(ddpa, FTAG); 2848 return (SET_ERROR(EXDEV)); 2849 } 2850 2851 /* 2852 * Compute and check the amount of space to transfer. Since this is 2853 * so expensive, don't do the preliminary check. 2854 */ 2855 if (!dmu_tx_is_syncing(tx)) { 2856 promote_rele(ddpa, FTAG); 2857 return (0); 2858 } 2859 2860 /* compute origin's new unique space */ 2861 snap = list_tail(&ddpa->clone_snaps); 2862 ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, 2863 origin_ds->ds_object); 2864 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2865 dsl_dataset_phys(origin_ds)->ds_prev_snap_txg, UINT64_MAX, 2866 &ddpa->unique, &unused, &unused); 2867 2868 /* 2869 * Walk the snapshots that we are moving 2870 * 2871 * Compute space to transfer. Consider the incremental changes 2872 * to used by each snapshot: 2873 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2874 * So each snapshot gave birth to: 2875 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2876 * So a sequence would look like: 2877 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2878 * Which simplifies to: 2879 * uN + kN + kN-1 + ... + k1 + k0 2880 * Note however, if we stop before we reach the ORIGIN we get: 2881 * uN + kN + kN-1 + ... + kM - uM-1 2882 */ 2883 conflicting_snaps = B_FALSE; 2884 ss_mv_cnt = 0; 2885 ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes; 2886 ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes; 2887 ddpa->uncomp = dsl_dataset_phys(origin_ds)->ds_uncompressed_bytes; 2888 for (snap = list_head(&ddpa->shared_snaps); snap; 2889 snap = list_next(&ddpa->shared_snaps, snap)) { 2890 uint64_t val, dlused, dlcomp, dluncomp; 2891 dsl_dataset_t *ds = snap->ds; 2892 2893 ss_mv_cnt++; 2894 2895 /* 2896 * If there are long holds, we won't be able to evict 2897 * the objset. 2898 */ 2899 if (dsl_dataset_long_held(ds)) { 2900 err = SET_ERROR(EBUSY); 2901 goto out; 2902 } 2903 2904 /* Check that the snapshot name does not conflict */ 2905 VERIFY0(dsl_dataset_get_snapname(ds)); 2906 if (strlen(ds->ds_snapname) >= max_snap_len) { 2907 err = SET_ERROR(ENAMETOOLONG); 2908 goto out; 2909 } 2910 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2911 if (err == 0) { 2912 fnvlist_add_boolean(ddpa->err_ds, 2913 snap->ds->ds_snapname); 2914 conflicting_snaps = B_TRUE; 2915 } else if (err != ENOENT) { 2916 goto out; 2917 } 2918 2919 /* The very first snapshot does not have a deadlist */ 2920 if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0) 2921 continue; 2922 2923 dsl_deadlist_space(&ds->ds_deadlist, 2924 &dlused, &dlcomp, &dluncomp); 2925 ddpa->used += dlused; 2926 ddpa->comp += dlcomp; 2927 ddpa->uncomp += dluncomp; 2928 } 2929 2930 /* 2931 * In order to return the full list of conflicting snapshots, we check 2932 * whether there was a conflict after traversing all of them. 2933 */ 2934 if (conflicting_snaps) { 2935 err = SET_ERROR(EEXIST); 2936 goto out; 2937 } 2938 2939 /* 2940 * If we are a clone of a clone then we never reached ORIGIN, 2941 * so we need to subtract out the clone origin's used space. 2942 */ 2943 if (ddpa->origin_origin) { 2944 ddpa->used -= 2945 dsl_dataset_phys(ddpa->origin_origin)->ds_referenced_bytes; 2946 ddpa->comp -= 2947 dsl_dataset_phys(ddpa->origin_origin)->ds_compressed_bytes; 2948 ddpa->uncomp -= 2949 dsl_dataset_phys(ddpa->origin_origin)-> 2950 ds_uncompressed_bytes; 2951 } 2952 2953 /* Check that there is enough space and limit headroom here */ 2954 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2955 0, ss_mv_cnt, ddpa->used, ddpa->cr); 2956 if (err != 0) 2957 goto out; 2958 2959 /* 2960 * Compute the amounts of space that will be used by snapshots 2961 * after the promotion (for both origin and clone). For each, 2962 * it is the amount of space that will be on all of their 2963 * deadlists (that was not born before their new origin). 2964 */ 2965 if (dsl_dir_phys(hds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2966 uint64_t space; 2967 2968 /* 2969 * Note, typically this will not be a clone of a clone, 2970 * so dd_origin_txg will be < TXG_INITIAL, so 2971 * these snaplist_space() -> dsl_deadlist_space_range() 2972 * calls will be fast because they do not have to 2973 * iterate over all bps. 2974 */ 2975 snap = list_head(&ddpa->origin_snaps); 2976 err = snaplist_space(&ddpa->shared_snaps, 2977 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2978 if (err != 0) 2979 goto out; 2980 2981 err = snaplist_space(&ddpa->clone_snaps, 2982 snap->ds->ds_dir->dd_origin_txg, &space); 2983 if (err != 0) 2984 goto out; 2985 ddpa->cloneusedsnap += space; 2986 } 2987 if (dsl_dir_phys(origin_ds->ds_dir)->dd_flags & 2988 DD_FLAG_USED_BREAKDOWN) { 2989 err = snaplist_space(&ddpa->origin_snaps, 2990 dsl_dataset_phys(origin_ds)->ds_creation_txg, 2991 &ddpa->originusedsnap); 2992 if (err != 0) 2993 goto out; 2994 } 2995 2996out: 2997 promote_rele(ddpa, FTAG); 2998 return (err); 2999} 3000 3001void 3002dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 3003{ 3004 dsl_dataset_promote_arg_t *ddpa = arg; 3005 dsl_pool_t *dp = dmu_tx_pool(tx); 3006 dsl_dataset_t *hds; 3007 struct promotenode *snap; 3008 dsl_dataset_t *origin_ds; 3009 dsl_dataset_t *origin_head; 3010 dsl_dir_t *dd; 3011 dsl_dir_t *odd = NULL; 3012 uint64_t oldnext_obj; 3013 int64_t delta; 3014#if defined(__FreeBSD__) && defined(_KERNEL) 3015 char *oldname, *newname; 3016#endif 3017 3018 VERIFY0(promote_hold(ddpa, dp, FTAG)); 3019 hds = ddpa->ddpa_clone; 3020 3021 ASSERT0(dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE); 3022 3023 snap = list_head(&ddpa->shared_snaps); 3024 origin_ds = snap->ds; 3025 dd = hds->ds_dir; 3026 3027 snap = list_head(&ddpa->origin_snaps); 3028 origin_head = snap->ds; 3029 3030 /* 3031 * We need to explicitly open odd, since origin_ds's dd will be 3032 * changing. 3033 */ 3034 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 3035 NULL, FTAG, &odd)); 3036 3037 /* change origin's next snap */ 3038 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 3039 oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj; 3040 snap = list_tail(&ddpa->clone_snaps); 3041 ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, 3042 origin_ds->ds_object); 3043 dsl_dataset_phys(origin_ds)->ds_next_snap_obj = snap->ds->ds_object; 3044 3045 /* change the origin's next clone */ 3046 if (dsl_dataset_phys(origin_ds)->ds_next_clones_obj) { 3047 dsl_dataset_remove_from_next_clones(origin_ds, 3048 snap->ds->ds_object, tx); 3049 VERIFY0(zap_add_int(dp->dp_meta_objset, 3050 dsl_dataset_phys(origin_ds)->ds_next_clones_obj, 3051 oldnext_obj, tx)); 3052 } 3053 3054 /* change origin */ 3055 dmu_buf_will_dirty(dd->dd_dbuf, tx); 3056 ASSERT3U(dsl_dir_phys(dd)->dd_origin_obj, ==, origin_ds->ds_object); 3057 dsl_dir_phys(dd)->dd_origin_obj = dsl_dir_phys(odd)->dd_origin_obj; 3058 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 3059 dmu_buf_will_dirty(odd->dd_dbuf, tx); 3060 dsl_dir_phys(odd)->dd_origin_obj = origin_ds->ds_object; 3061 origin_head->ds_dir->dd_origin_txg = 3062 dsl_dataset_phys(origin_ds)->ds_creation_txg; 3063 3064 /* change dd_clone entries */ 3065 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 3066 VERIFY0(zap_remove_int(dp->dp_meta_objset, 3067 dsl_dir_phys(odd)->dd_clones, hds->ds_object, tx)); 3068 VERIFY0(zap_add_int(dp->dp_meta_objset, 3069 dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, 3070 hds->ds_object, tx)); 3071 3072 VERIFY0(zap_remove_int(dp->dp_meta_objset, 3073 dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, 3074 origin_head->ds_object, tx)); 3075 if (dsl_dir_phys(dd)->dd_clones == 0) { 3076 dsl_dir_phys(dd)->dd_clones = 3077 zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES, 3078 DMU_OT_NONE, 0, tx); 3079 } 3080 VERIFY0(zap_add_int(dp->dp_meta_objset, 3081 dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx)); 3082 } 3083 3084#if defined(__FreeBSD__) && defined(_KERNEL) 3085 /* Take the spa_namespace_lock early so zvol renames don't deadlock. */ 3086 mutex_enter(&spa_namespace_lock); 3087 3088 oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3089 newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3090#endif 3091 3092 /* move snapshots to this dir */ 3093 for (snap = list_head(&ddpa->shared_snaps); snap; 3094 snap = list_next(&ddpa->shared_snaps, snap)) { 3095 dsl_dataset_t *ds = snap->ds; 3096 3097 /* 3098 * Property callbacks are registered to a particular 3099 * dsl_dir. Since ours is changing, evict the objset 3100 * so that they will be unregistered from the old dsl_dir. 3101 */ 3102 if (ds->ds_objset) { 3103 dmu_objset_evict(ds->ds_objset); 3104 ds->ds_objset = NULL; 3105 } 3106 3107 /* move snap name entry */ 3108 VERIFY0(dsl_dataset_get_snapname(ds)); 3109 VERIFY0(dsl_dataset_snap_remove(origin_head, 3110 ds->ds_snapname, tx, B_TRUE)); 3111 VERIFY0(zap_add(dp->dp_meta_objset, 3112 dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname, 3113 8, 1, &ds->ds_object, tx)); 3114 dsl_fs_ss_count_adjust(hds->ds_dir, 1, 3115 DD_FIELD_SNAPSHOT_COUNT, tx); 3116 3117 /* change containing dsl_dir */ 3118 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3119 ASSERT3U(dsl_dataset_phys(ds)->ds_dir_obj, ==, odd->dd_object); 3120 dsl_dataset_phys(ds)->ds_dir_obj = dd->dd_object; 3121 ASSERT3P(ds->ds_dir, ==, odd); 3122 dsl_dir_rele(ds->ds_dir, ds); 3123 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 3124 NULL, ds, &ds->ds_dir)); 3125 3126#if defined(__FreeBSD__) && defined(_KERNEL) 3127 dsl_dataset_name(ds, newname); 3128 zfsvfs_update_fromname(oldname, newname); 3129 zvol_rename_minors(oldname, newname); 3130#endif 3131 3132 /* move any clone references */ 3133 if (dsl_dataset_phys(ds)->ds_next_clones_obj && 3134 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 3135 zap_cursor_t zc; 3136 zap_attribute_t za; 3137 3138 for (zap_cursor_init(&zc, dp->dp_meta_objset, 3139 dsl_dataset_phys(ds)->ds_next_clones_obj); 3140 zap_cursor_retrieve(&zc, &za) == 0; 3141 zap_cursor_advance(&zc)) { 3142 dsl_dataset_t *cnds; 3143 uint64_t o; 3144 3145 if (za.za_first_integer == oldnext_obj) { 3146 /* 3147 * We've already moved the 3148 * origin's reference. 3149 */ 3150 continue; 3151 } 3152 3153 VERIFY0(dsl_dataset_hold_obj(dp, 3154 za.za_first_integer, FTAG, &cnds)); 3155 o = dsl_dir_phys(cnds->ds_dir)-> 3156 dd_head_dataset_obj; 3157 3158 VERIFY0(zap_remove_int(dp->dp_meta_objset, 3159 dsl_dir_phys(odd)->dd_clones, o, tx)); 3160 VERIFY0(zap_add_int(dp->dp_meta_objset, 3161 dsl_dir_phys(dd)->dd_clones, o, tx)); 3162 dsl_dataset_rele(cnds, FTAG); 3163 } 3164 zap_cursor_fini(&zc); 3165 } 3166 3167 ASSERT(!dsl_prop_hascb(ds)); 3168 } 3169 3170#if defined(__FreeBSD__) && defined(_KERNEL) 3171 mutex_exit(&spa_namespace_lock); 3172 3173 kmem_free(newname, MAXPATHLEN); 3174 kmem_free(oldname, MAXPATHLEN); 3175#endif 3176 /* 3177 * Change space accounting. 3178 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 3179 * both be valid, or both be 0 (resulting in delta == 0). This 3180 * is true for each of {clone,origin} independently. 3181 */ 3182 3183 delta = ddpa->cloneusedsnap - 3184 dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]; 3185 ASSERT3S(delta, >=, 0); 3186 ASSERT3U(ddpa->used, >=, delta); 3187 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 3188 dsl_dir_diduse_space(dd, DD_USED_HEAD, 3189 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 3190 3191 delta = ddpa->originusedsnap - 3192 dsl_dir_phys(odd)->dd_used_breakdown[DD_USED_SNAP]; 3193 ASSERT3S(delta, <=, 0); 3194 ASSERT3U(ddpa->used, >=, -delta); 3195 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 3196 dsl_dir_diduse_space(odd, DD_USED_HEAD, 3197 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 3198 3199 dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique; 3200 3201 /* log history record */ 3202 spa_history_log_internal_ds(hds, "promote", tx, ""); 3203 3204 dsl_dir_rele(odd, FTAG); 3205 promote_rele(ddpa, FTAG); 3206} 3207 3208/* 3209 * Make a list of dsl_dataset_t's for the snapshots between first_obj 3210 * (exclusive) and last_obj (inclusive). The list will be in reverse 3211 * order (last_obj will be the list_head()). If first_obj == 0, do all 3212 * snapshots back to this dataset's origin. 3213 */ 3214static int 3215snaplist_make(dsl_pool_t *dp, 3216 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 3217{ 3218 uint64_t obj = last_obj; 3219 3220 list_create(l, sizeof (struct promotenode), 3221 offsetof(struct promotenode, link)); 3222 3223 while (obj != first_obj) { 3224 dsl_dataset_t *ds; 3225 struct promotenode *snap; 3226 int err; 3227 3228 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 3229 ASSERT(err != ENOENT); 3230 if (err != 0) 3231 return (err); 3232 3233 if (first_obj == 0) 3234 first_obj = dsl_dir_phys(ds->ds_dir)->dd_origin_obj; 3235 3236 snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 3237 snap->ds = ds; 3238 list_insert_tail(l, snap); 3239 obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 3240 } 3241 3242 return (0); 3243} 3244 3245static int 3246snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 3247{ 3248 struct promotenode *snap; 3249 3250 *spacep = 0; 3251 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 3252 uint64_t used, comp, uncomp; 3253 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 3254 mintxg, UINT64_MAX, &used, &comp, &uncomp); 3255 *spacep += used; 3256 } 3257 return (0); 3258} 3259 3260static void 3261snaplist_destroy(list_t *l, void *tag) 3262{ 3263 struct promotenode *snap; 3264 3265 if (l == NULL || !list_link_active(&l->list_head)) 3266 return; 3267 3268 while ((snap = list_tail(l)) != NULL) { 3269 list_remove(l, snap); 3270 dsl_dataset_rele(snap->ds, tag); 3271 kmem_free(snap, sizeof (*snap)); 3272 } 3273 list_destroy(l); 3274} 3275 3276static int 3277promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 3278{ 3279 int error; 3280 dsl_dir_t *dd; 3281 struct promotenode *snap; 3282 3283 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 3284 &ddpa->ddpa_clone); 3285 if (error != 0) 3286 return (error); 3287 dd = ddpa->ddpa_clone->ds_dir; 3288 3289 if (ddpa->ddpa_clone->ds_is_snapshot || 3290 !dsl_dir_is_clone(dd)) { 3291 dsl_dataset_rele(ddpa->ddpa_clone, tag); 3292 return (SET_ERROR(EINVAL)); 3293 } 3294 3295 error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj, 3296 &ddpa->shared_snaps, tag); 3297 if (error != 0) 3298 goto out; 3299 3300 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 3301 &ddpa->clone_snaps, tag); 3302 if (error != 0) 3303 goto out; 3304 3305 snap = list_head(&ddpa->shared_snaps); 3306 ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj); 3307 error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj, 3308 dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj, 3309 &ddpa->origin_snaps, tag); 3310 if (error != 0) 3311 goto out; 3312 3313 if (dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj != 0) { 3314 error = dsl_dataset_hold_obj(dp, 3315 dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj, 3316 tag, &ddpa->origin_origin); 3317 if (error != 0) 3318 goto out; 3319 } 3320out: 3321 if (error != 0) 3322 promote_rele(ddpa, tag); 3323 return (error); 3324} 3325 3326static void 3327promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 3328{ 3329 snaplist_destroy(&ddpa->shared_snaps, tag); 3330 snaplist_destroy(&ddpa->clone_snaps, tag); 3331 snaplist_destroy(&ddpa->origin_snaps, tag); 3332 if (ddpa->origin_origin != NULL) 3333 dsl_dataset_rele(ddpa->origin_origin, tag); 3334 dsl_dataset_rele(ddpa->ddpa_clone, tag); 3335} 3336 3337/* 3338 * Promote a clone. 3339 * 3340 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 3341 * in with the name. (It must be at least ZFS_MAX_DATASET_NAME_LEN bytes long.) 3342 */ 3343int 3344dsl_dataset_promote(const char *name, char *conflsnap) 3345{ 3346 dsl_dataset_promote_arg_t ddpa = { 0 }; 3347 uint64_t numsnaps; 3348 int error; 3349 nvpair_t *snap_pair; 3350 objset_t *os; 3351 3352 /* 3353 * We will modify space proportional to the number of 3354 * snapshots. Compute numsnaps. 3355 */ 3356 error = dmu_objset_hold(name, FTAG, &os); 3357 if (error != 0) 3358 return (error); 3359 error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 3360 dsl_dataset_phys(dmu_objset_ds(os))->ds_snapnames_zapobj, 3361 &numsnaps); 3362 dmu_objset_rele(os, FTAG); 3363 if (error != 0) 3364 return (error); 3365 3366 ddpa.ddpa_clonename = name; 3367 ddpa.err_ds = fnvlist_alloc(); 3368 ddpa.cr = CRED(); 3369 3370 error = dsl_sync_task(name, dsl_dataset_promote_check, 3371 dsl_dataset_promote_sync, &ddpa, 3372 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED); 3373 3374 /* 3375 * Return the first conflicting snapshot found. 3376 */ 3377 snap_pair = nvlist_next_nvpair(ddpa.err_ds, NULL); 3378 if (snap_pair != NULL && conflsnap != NULL) 3379 (void) strcpy(conflsnap, nvpair_name(snap_pair)); 3380 3381 fnvlist_free(ddpa.err_ds); 3382 return (error); 3383} 3384 3385int 3386dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 3387 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 3388{ 3389 /* 3390 * "slack" factor for received datasets with refquota set on them. 3391 * See the bottom of this function for details on its use. 3392 */ 3393 uint64_t refquota_slack = DMU_MAX_ACCESS * spa_asize_inflation; 3394 int64_t unused_refres_delta; 3395 3396 /* they should both be heads */ 3397 if (clone->ds_is_snapshot || 3398 origin_head->ds_is_snapshot) 3399 return (SET_ERROR(EINVAL)); 3400 3401 /* if we are not forcing, the branch point should be just before them */ 3402 if (!force && clone->ds_prev != origin_head->ds_prev) 3403 return (SET_ERROR(EINVAL)); 3404 3405 /* clone should be the clone (unless they are unrelated) */ 3406 if (clone->ds_prev != NULL && 3407 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 3408 origin_head->ds_dir != clone->ds_prev->ds_dir) 3409 return (SET_ERROR(EINVAL)); 3410 3411 /* the clone should be a child of the origin */ 3412 if (clone->ds_dir->dd_parent != origin_head->ds_dir) 3413 return (SET_ERROR(EINVAL)); 3414 3415 /* origin_head shouldn't be modified unless 'force' */ 3416 if (!force && 3417 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) 3418 return (SET_ERROR(ETXTBSY)); 3419 3420 /* origin_head should have no long holds (e.g. is not mounted) */ 3421 if (dsl_dataset_handoff_check(origin_head, owner, tx)) 3422 return (SET_ERROR(EBUSY)); 3423 3424 /* check amount of any unconsumed refreservation */ 3425 unused_refres_delta = 3426 (int64_t)MIN(origin_head->ds_reserved, 3427 dsl_dataset_phys(origin_head)->ds_unique_bytes) - 3428 (int64_t)MIN(origin_head->ds_reserved, 3429 dsl_dataset_phys(clone)->ds_unique_bytes); 3430 3431 if (unused_refres_delta > 0 && 3432 unused_refres_delta > 3433 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 3434 return (SET_ERROR(ENOSPC)); 3435 3436 /* 3437 * The clone can't be too much over the head's refquota. 3438 * 3439 * To ensure that the entire refquota can be used, we allow one 3440 * transaction to exceed the the refquota. Therefore, this check 3441 * needs to also allow for the space referenced to be more than the 3442 * refquota. The maximum amount of space that one transaction can use 3443 * on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this 3444 * overage ensures that we are able to receive a filesystem that 3445 * exceeds the refquota on the source system. 3446 * 3447 * So that overage is the refquota_slack we use below. 3448 */ 3449 if (origin_head->ds_quota != 0 && 3450 dsl_dataset_phys(clone)->ds_referenced_bytes > 3451 origin_head->ds_quota + refquota_slack) 3452 return (SET_ERROR(EDQUOT)); 3453 3454 return (0); 3455} 3456 3457static void 3458dsl_dataset_swap_remap_deadlists(dsl_dataset_t *clone, 3459 dsl_dataset_t *origin, dmu_tx_t *tx) 3460{ 3461 uint64_t clone_remap_dl_obj, origin_remap_dl_obj; 3462 dsl_pool_t *dp = dmu_tx_pool(tx); 3463 3464 ASSERT(dsl_pool_sync_context(dp)); 3465 3466 clone_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(clone); 3467 origin_remap_dl_obj = dsl_dataset_get_remap_deadlist_object(origin); 3468 3469 if (clone_remap_dl_obj != 0) { 3470 dsl_deadlist_close(&clone->ds_remap_deadlist); 3471 dsl_dataset_unset_remap_deadlist_object(clone, tx); 3472 } 3473 if (origin_remap_dl_obj != 0) { 3474 dsl_deadlist_close(&origin->ds_remap_deadlist); 3475 dsl_dataset_unset_remap_deadlist_object(origin, tx); 3476 } 3477 3478 if (clone_remap_dl_obj != 0) { 3479 dsl_dataset_set_remap_deadlist_object(origin, 3480 clone_remap_dl_obj, tx); 3481 dsl_deadlist_open(&origin->ds_remap_deadlist, 3482 dp->dp_meta_objset, clone_remap_dl_obj); 3483 } 3484 if (origin_remap_dl_obj != 0) { 3485 dsl_dataset_set_remap_deadlist_object(clone, 3486 origin_remap_dl_obj, tx); 3487 dsl_deadlist_open(&clone->ds_remap_deadlist, 3488 dp->dp_meta_objset, origin_remap_dl_obj); 3489 } 3490} 3491 3492void 3493dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 3494 dsl_dataset_t *origin_head, dmu_tx_t *tx) 3495{ 3496 dsl_pool_t *dp = dmu_tx_pool(tx); 3497 int64_t unused_refres_delta; 3498 3499 ASSERT(clone->ds_reserved == 0); 3500 /* 3501 * NOTE: On DEBUG kernels there could be a race between this and 3502 * the check function if spa_asize_inflation is adjusted... 3503 */ 3504 ASSERT(origin_head->ds_quota == 0 || 3505 dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota + 3506 DMU_MAX_ACCESS * spa_asize_inflation); 3507 ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); 3508 3509 /* 3510 * Swap per-dataset feature flags. 3511 */ 3512 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 3513 if (!(spa_feature_table[f].fi_flags & 3514 ZFEATURE_FLAG_PER_DATASET)) { 3515 ASSERT(!clone->ds_feature_inuse[f]); 3516 ASSERT(!origin_head->ds_feature_inuse[f]); 3517 continue; 3518 } 3519 3520 boolean_t clone_inuse = clone->ds_feature_inuse[f]; 3521 boolean_t origin_head_inuse = origin_head->ds_feature_inuse[f]; 3522 3523 if (clone_inuse) { 3524 dsl_dataset_deactivate_feature(clone->ds_object, f, tx); 3525 clone->ds_feature_inuse[f] = B_FALSE; 3526 } 3527 if (origin_head_inuse) { 3528 dsl_dataset_deactivate_feature(origin_head->ds_object, 3529 f, tx); 3530 origin_head->ds_feature_inuse[f] = B_FALSE; 3531 } 3532 if (clone_inuse) { 3533 dsl_dataset_activate_feature(origin_head->ds_object, 3534 f, tx); 3535 origin_head->ds_feature_inuse[f] = B_TRUE; 3536 } 3537 if (origin_head_inuse) { 3538 dsl_dataset_activate_feature(clone->ds_object, f, tx); 3539 clone->ds_feature_inuse[f] = B_TRUE; 3540 } 3541 } 3542 3543 dmu_buf_will_dirty(clone->ds_dbuf, tx); 3544 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 3545 3546 if (clone->ds_objset != NULL) { 3547 dmu_objset_evict(clone->ds_objset); 3548 clone->ds_objset = NULL; 3549 } 3550 3551 if (origin_head->ds_objset != NULL) { 3552 dmu_objset_evict(origin_head->ds_objset); 3553 origin_head->ds_objset = NULL; 3554 } 3555 3556 unused_refres_delta = 3557 (int64_t)MIN(origin_head->ds_reserved, 3558 dsl_dataset_phys(origin_head)->ds_unique_bytes) - 3559 (int64_t)MIN(origin_head->ds_reserved, 3560 dsl_dataset_phys(clone)->ds_unique_bytes); 3561 3562 /* 3563 * Reset origin's unique bytes, if it exists. 3564 */ 3565 if (clone->ds_prev) { 3566 dsl_dataset_t *origin = clone->ds_prev; 3567 uint64_t comp, uncomp; 3568 3569 dmu_buf_will_dirty(origin->ds_dbuf, tx); 3570 dsl_deadlist_space_range(&clone->ds_deadlist, 3571 dsl_dataset_phys(origin)->ds_prev_snap_txg, UINT64_MAX, 3572 &dsl_dataset_phys(origin)->ds_unique_bytes, &comp, &uncomp); 3573 } 3574 3575 /* swap blkptrs */ 3576 { 3577 rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG); 3578 rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG); 3579 blkptr_t tmp; 3580 tmp = dsl_dataset_phys(origin_head)->ds_bp; 3581 dsl_dataset_phys(origin_head)->ds_bp = 3582 dsl_dataset_phys(clone)->ds_bp; 3583 dsl_dataset_phys(clone)->ds_bp = tmp; 3584 rrw_exit(&origin_head->ds_bp_rwlock, FTAG); 3585 rrw_exit(&clone->ds_bp_rwlock, FTAG); 3586 } 3587 3588 /* set dd_*_bytes */ 3589 { 3590 int64_t dused, dcomp, duncomp; 3591 uint64_t cdl_used, cdl_comp, cdl_uncomp; 3592 uint64_t odl_used, odl_comp, odl_uncomp; 3593 3594 ASSERT3U(dsl_dir_phys(clone->ds_dir)-> 3595 dd_used_breakdown[DD_USED_SNAP], ==, 0); 3596 3597 dsl_deadlist_space(&clone->ds_deadlist, 3598 &cdl_used, &cdl_comp, &cdl_uncomp); 3599 dsl_deadlist_space(&origin_head->ds_deadlist, 3600 &odl_used, &odl_comp, &odl_uncomp); 3601 3602 dused = dsl_dataset_phys(clone)->ds_referenced_bytes + 3603 cdl_used - 3604 (dsl_dataset_phys(origin_head)->ds_referenced_bytes + 3605 odl_used); 3606 dcomp = dsl_dataset_phys(clone)->ds_compressed_bytes + 3607 cdl_comp - 3608 (dsl_dataset_phys(origin_head)->ds_compressed_bytes + 3609 odl_comp); 3610 duncomp = dsl_dataset_phys(clone)->ds_uncompressed_bytes + 3611 cdl_uncomp - 3612 (dsl_dataset_phys(origin_head)->ds_uncompressed_bytes + 3613 odl_uncomp); 3614 3615 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 3616 dused, dcomp, duncomp, tx); 3617 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 3618 -dused, -dcomp, -duncomp, tx); 3619 3620 /* 3621 * The difference in the space used by snapshots is the 3622 * difference in snapshot space due to the head's 3623 * deadlist (since that's the only thing that's 3624 * changing that affects the snapused). 3625 */ 3626 dsl_deadlist_space_range(&clone->ds_deadlist, 3627 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 3628 &cdl_used, &cdl_comp, &cdl_uncomp); 3629 dsl_deadlist_space_range(&origin_head->ds_deadlist, 3630 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 3631 &odl_used, &odl_comp, &odl_uncomp); 3632 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 3633 DD_USED_HEAD, DD_USED_SNAP, NULL); 3634 } 3635 3636 /* swap ds_*_bytes */ 3637 SWITCH64(dsl_dataset_phys(origin_head)->ds_referenced_bytes, 3638 dsl_dataset_phys(clone)->ds_referenced_bytes); 3639 SWITCH64(dsl_dataset_phys(origin_head)->ds_compressed_bytes, 3640 dsl_dataset_phys(clone)->ds_compressed_bytes); 3641 SWITCH64(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes, 3642 dsl_dataset_phys(clone)->ds_uncompressed_bytes); 3643 SWITCH64(dsl_dataset_phys(origin_head)->ds_unique_bytes, 3644 dsl_dataset_phys(clone)->ds_unique_bytes); 3645 3646 /* apply any parent delta for change in unconsumed refreservation */ 3647 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 3648 unused_refres_delta, 0, 0, tx); 3649 3650 /* 3651 * Swap deadlists. 3652 */ 3653 dsl_deadlist_close(&clone->ds_deadlist); 3654 dsl_deadlist_close(&origin_head->ds_deadlist); 3655 SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj, 3656 dsl_dataset_phys(clone)->ds_deadlist_obj); 3657 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 3658 dsl_dataset_phys(clone)->ds_deadlist_obj); 3659 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 3660 dsl_dataset_phys(origin_head)->ds_deadlist_obj); 3661 dsl_dataset_swap_remap_deadlists(clone, origin_head, tx); 3662 3663 dsl_scan_ds_clone_swapped(origin_head, clone, tx); 3664 3665 spa_history_log_internal_ds(clone, "clone swap", tx, 3666 "parent=%s", origin_head->ds_dir->dd_myname); 3667} 3668 3669/* 3670 * Given a pool name and a dataset object number in that pool, 3671 * return the name of that dataset. 3672 */ 3673int 3674dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 3675{ 3676 dsl_pool_t *dp; 3677 dsl_dataset_t *ds; 3678 int error; 3679 3680 error = dsl_pool_hold(pname, FTAG, &dp); 3681 if (error != 0) 3682 return (error); 3683 3684 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 3685 if (error == 0) { 3686 dsl_dataset_name(ds, buf); 3687 dsl_dataset_rele(ds, FTAG); 3688 } 3689 dsl_pool_rele(dp, FTAG); 3690 3691 return (error); 3692} 3693 3694int 3695dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 3696 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 3697{ 3698 int error = 0; 3699 3700 ASSERT3S(asize, >, 0); 3701 3702 /* 3703 * *ref_rsrv is the portion of asize that will come from any 3704 * unconsumed refreservation space. 3705 */ 3706 *ref_rsrv = 0; 3707 3708 mutex_enter(&ds->ds_lock); 3709 /* 3710 * Make a space adjustment for reserved bytes. 3711 */ 3712 if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) { 3713 ASSERT3U(*used, >=, 3714 ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); 3715 *used -= 3716 (ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); 3717 *ref_rsrv = 3718 asize - MIN(asize, parent_delta(ds, asize + inflight)); 3719 } 3720 3721 if (!check_quota || ds->ds_quota == 0) { 3722 mutex_exit(&ds->ds_lock); 3723 return (0); 3724 } 3725 /* 3726 * If they are requesting more space, and our current estimate 3727 * is over quota, they get to try again unless the actual 3728 * on-disk is over quota and there are no pending changes (which 3729 * may free up space for us). 3730 */ 3731 if (dsl_dataset_phys(ds)->ds_referenced_bytes + inflight >= 3732 ds->ds_quota) { 3733 if (inflight > 0 || 3734 dsl_dataset_phys(ds)->ds_referenced_bytes < ds->ds_quota) 3735 error = SET_ERROR(ERESTART); 3736 else 3737 error = SET_ERROR(EDQUOT); 3738 } 3739 mutex_exit(&ds->ds_lock); 3740 3741 return (error); 3742} 3743 3744typedef struct dsl_dataset_set_qr_arg { 3745 const char *ddsqra_name; 3746 zprop_source_t ddsqra_source; 3747 uint64_t ddsqra_value; 3748} dsl_dataset_set_qr_arg_t; 3749 3750 3751/* ARGSUSED */ 3752static int 3753dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 3754{ 3755 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3756 dsl_pool_t *dp = dmu_tx_pool(tx); 3757 dsl_dataset_t *ds; 3758 int error; 3759 uint64_t newval; 3760 3761 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 3762 return (SET_ERROR(ENOTSUP)); 3763 3764 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 3765 if (error != 0) 3766 return (error); 3767 3768 if (ds->ds_is_snapshot) { 3769 dsl_dataset_rele(ds, FTAG); 3770 return (SET_ERROR(EINVAL)); 3771 } 3772 3773 error = dsl_prop_predict(ds->ds_dir, 3774 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 3775 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 3776 if (error != 0) { 3777 dsl_dataset_rele(ds, FTAG); 3778 return (error); 3779 } 3780 3781 if (newval == 0) { 3782 dsl_dataset_rele(ds, FTAG); 3783 return (0); 3784 } 3785 3786 if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes || 3787 newval < ds->ds_reserved) { 3788 dsl_dataset_rele(ds, FTAG); 3789 return (SET_ERROR(ENOSPC)); 3790 } 3791 3792 dsl_dataset_rele(ds, FTAG); 3793 return (0); 3794} 3795 3796static void 3797dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 3798{ 3799 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3800 dsl_pool_t *dp = dmu_tx_pool(tx); 3801 dsl_dataset_t *ds; 3802 uint64_t newval; 3803 3804 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3805 3806 dsl_prop_set_sync_impl(ds, 3807 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 3808 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 3809 &ddsqra->ddsqra_value, tx); 3810 3811 VERIFY0(dsl_prop_get_int_ds(ds, 3812 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 3813 3814 if (ds->ds_quota != newval) { 3815 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3816 ds->ds_quota = newval; 3817 } 3818 dsl_dataset_rele(ds, FTAG); 3819} 3820 3821int 3822dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 3823 uint64_t refquota) 3824{ 3825 dsl_dataset_set_qr_arg_t ddsqra; 3826 3827 ddsqra.ddsqra_name = dsname; 3828 ddsqra.ddsqra_source = source; 3829 ddsqra.ddsqra_value = refquota; 3830 3831 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 3832 dsl_dataset_set_refquota_sync, &ddsqra, 0, 3833 ZFS_SPACE_CHECK_EXTRA_RESERVED)); 3834} 3835 3836static int 3837dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 3838{ 3839 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3840 dsl_pool_t *dp = dmu_tx_pool(tx); 3841 dsl_dataset_t *ds; 3842 int error; 3843 uint64_t newval, unique; 3844 3845 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 3846 return (SET_ERROR(ENOTSUP)); 3847 3848 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 3849 if (error != 0) 3850 return (error); 3851 3852 if (ds->ds_is_snapshot) { 3853 dsl_dataset_rele(ds, FTAG); 3854 return (SET_ERROR(EINVAL)); 3855 } 3856 3857 error = dsl_prop_predict(ds->ds_dir, 3858 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3859 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 3860 if (error != 0) { 3861 dsl_dataset_rele(ds, FTAG); 3862 return (error); 3863 } 3864 3865 /* 3866 * If we are doing the preliminary check in open context, the 3867 * space estimates may be inaccurate. 3868 */ 3869 if (!dmu_tx_is_syncing(tx)) { 3870 dsl_dataset_rele(ds, FTAG); 3871 return (0); 3872 } 3873 3874 mutex_enter(&ds->ds_lock); 3875 if (!DS_UNIQUE_IS_ACCURATE(ds)) 3876 dsl_dataset_recalc_head_uniq(ds); 3877 unique = dsl_dataset_phys(ds)->ds_unique_bytes; 3878 mutex_exit(&ds->ds_lock); 3879 3880 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 3881 uint64_t delta = MAX(unique, newval) - 3882 MAX(unique, ds->ds_reserved); 3883 3884 if (delta > 3885 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 3886 (ds->ds_quota > 0 && newval > ds->ds_quota)) { 3887 dsl_dataset_rele(ds, FTAG); 3888 return (SET_ERROR(ENOSPC)); 3889 } 3890 } 3891 3892 dsl_dataset_rele(ds, FTAG); 3893 return (0); 3894} 3895 3896void 3897dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 3898 zprop_source_t source, uint64_t value, dmu_tx_t *tx) 3899{ 3900 uint64_t newval; 3901 uint64_t unique; 3902 int64_t delta; 3903 3904 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3905 source, sizeof (value), 1, &value, tx); 3906 3907 VERIFY0(dsl_prop_get_int_ds(ds, 3908 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 3909 3910 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3911 mutex_enter(&ds->ds_dir->dd_lock); 3912 mutex_enter(&ds->ds_lock); 3913 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 3914 unique = dsl_dataset_phys(ds)->ds_unique_bytes; 3915 delta = MAX(0, (int64_t)(newval - unique)) - 3916 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3917 ds->ds_reserved = newval; 3918 mutex_exit(&ds->ds_lock); 3919 3920 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3921 mutex_exit(&ds->ds_dir->dd_lock); 3922} 3923 3924static void 3925dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 3926{ 3927 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3928 dsl_pool_t *dp = dmu_tx_pool(tx); 3929 dsl_dataset_t *ds; 3930 3931 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3932 dsl_dataset_set_refreservation_sync_impl(ds, 3933 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 3934 dsl_dataset_rele(ds, FTAG); 3935} 3936 3937int 3938dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 3939 uint64_t refreservation) 3940{ 3941 dsl_dataset_set_qr_arg_t ddsqra; 3942 3943 ddsqra.ddsqra_name = dsname; 3944 ddsqra.ddsqra_source = source; 3945 ddsqra.ddsqra_value = refreservation; 3946 3947 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 3948 dsl_dataset_set_refreservation_sync, &ddsqra, 0, 3949 ZFS_SPACE_CHECK_EXTRA_RESERVED)); 3950} 3951 3952/* 3953 * Return (in *usedp) the amount of space written in new that is not 3954 * present in oldsnap. New may be a snapshot or the head. Old must be 3955 * a snapshot before new, in new's filesystem (or its origin). If not then 3956 * fail and return EINVAL. 3957 * 3958 * The written space is calculated by considering two components: First, we 3959 * ignore any freed space, and calculate the written as new's used space 3960 * minus old's used space. Next, we add in the amount of space that was freed 3961 * between the two snapshots, thus reducing new's used space relative to old's. 3962 * Specifically, this is the space that was born before old->ds_creation_txg, 3963 * and freed before new (ie. on new's deadlist or a previous deadlist). 3964 * 3965 * space freed [---------------------] 3966 * snapshots ---O-------O--------O-------O------ 3967 * oldsnap new 3968 */ 3969int 3970dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 3971 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3972{ 3973 int err = 0; 3974 uint64_t snapobj; 3975 dsl_pool_t *dp = new->ds_dir->dd_pool; 3976 3977 ASSERT(dsl_pool_config_held(dp)); 3978 3979 *usedp = 0; 3980 *usedp += dsl_dataset_phys(new)->ds_referenced_bytes; 3981 *usedp -= dsl_dataset_phys(oldsnap)->ds_referenced_bytes; 3982 3983 *compp = 0; 3984 *compp += dsl_dataset_phys(new)->ds_compressed_bytes; 3985 *compp -= dsl_dataset_phys(oldsnap)->ds_compressed_bytes; 3986 3987 *uncompp = 0; 3988 *uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes; 3989 *uncompp -= dsl_dataset_phys(oldsnap)->ds_uncompressed_bytes; 3990 3991 snapobj = new->ds_object; 3992 while (snapobj != oldsnap->ds_object) { 3993 dsl_dataset_t *snap; 3994 uint64_t used, comp, uncomp; 3995 3996 if (snapobj == new->ds_object) { 3997 snap = new; 3998 } else { 3999 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 4000 if (err != 0) 4001 break; 4002 } 4003 4004 if (dsl_dataset_phys(snap)->ds_prev_snap_txg == 4005 dsl_dataset_phys(oldsnap)->ds_creation_txg) { 4006 /* 4007 * The blocks in the deadlist can not be born after 4008 * ds_prev_snap_txg, so get the whole deadlist space, 4009 * which is more efficient (especially for old-format 4010 * deadlists). Unfortunately the deadlist code 4011 * doesn't have enough information to make this 4012 * optimization itself. 4013 */ 4014 dsl_deadlist_space(&snap->ds_deadlist, 4015 &used, &comp, &uncomp); 4016 } else { 4017 dsl_deadlist_space_range(&snap->ds_deadlist, 4018 0, dsl_dataset_phys(oldsnap)->ds_creation_txg, 4019 &used, &comp, &uncomp); 4020 } 4021 *usedp += used; 4022 *compp += comp; 4023 *uncompp += uncomp; 4024 4025 /* 4026 * If we get to the beginning of the chain of snapshots 4027 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 4028 * was not a snapshot of/before new. 4029 */ 4030 snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj; 4031 if (snap != new) 4032 dsl_dataset_rele(snap, FTAG); 4033 if (snapobj == 0) { 4034 err = SET_ERROR(EINVAL); 4035 break; 4036 } 4037 4038 } 4039 return (err); 4040} 4041 4042/* 4043 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 4044 * lastsnap, and all snapshots in between are deleted. 4045 * 4046 * blocks that would be freed [---------------------------] 4047 * snapshots ---O-------O--------O-------O--------O 4048 * firstsnap lastsnap 4049 * 4050 * This is the set of blocks that were born after the snap before firstsnap, 4051 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 4052 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 4053 * We calculate this by iterating over the relevant deadlists (from the snap 4054 * after lastsnap, backward to the snap after firstsnap), summing up the 4055 * space on the deadlist that was born after the snap before firstsnap. 4056 */ 4057int 4058dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 4059 dsl_dataset_t *lastsnap, 4060 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 4061{ 4062 int err = 0; 4063 uint64_t snapobj; 4064 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 4065 4066 ASSERT(firstsnap->ds_is_snapshot); 4067 ASSERT(lastsnap->ds_is_snapshot); 4068 4069 /* 4070 * Check that the snapshots are in the same dsl_dir, and firstsnap 4071 * is before lastsnap. 4072 */ 4073 if (firstsnap->ds_dir != lastsnap->ds_dir || 4074 dsl_dataset_phys(firstsnap)->ds_creation_txg > 4075 dsl_dataset_phys(lastsnap)->ds_creation_txg) 4076 return (SET_ERROR(EINVAL)); 4077 4078 *usedp = *compp = *uncompp = 0; 4079 4080 snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj; 4081 while (snapobj != firstsnap->ds_object) { 4082 dsl_dataset_t *ds; 4083 uint64_t used, comp, uncomp; 4084 4085 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 4086 if (err != 0) 4087 break; 4088 4089 dsl_deadlist_space_range(&ds->ds_deadlist, 4090 dsl_dataset_phys(firstsnap)->ds_prev_snap_txg, UINT64_MAX, 4091 &used, &comp, &uncomp); 4092 *usedp += used; 4093 *compp += comp; 4094 *uncompp += uncomp; 4095 4096 snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 4097 ASSERT3U(snapobj, !=, 0); 4098 dsl_dataset_rele(ds, FTAG); 4099 } 4100 return (err); 4101} 4102 4103/* 4104 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 4105 * For example, they could both be snapshots of the same filesystem, and 4106 * 'earlier' is before 'later'. Or 'earlier' could be the origin of 4107 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 4108 * filesystem. Or 'earlier' could be the origin's origin. 4109 * 4110 * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. 4111 */ 4112boolean_t 4113dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, 4114 uint64_t earlier_txg) 4115{ 4116 dsl_pool_t *dp = later->ds_dir->dd_pool; 4117 int error; 4118 boolean_t ret; 4119 4120 ASSERT(dsl_pool_config_held(dp)); 4121 ASSERT(earlier->ds_is_snapshot || earlier_txg != 0); 4122 4123 if (earlier_txg == 0) 4124 earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg; 4125 4126 if (later->ds_is_snapshot && 4127 earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg) 4128 return (B_FALSE); 4129 4130 if (later->ds_dir == earlier->ds_dir) 4131 return (B_TRUE); 4132 if (!dsl_dir_is_clone(later->ds_dir)) 4133 return (B_FALSE); 4134 4135 if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object) 4136 return (B_TRUE); 4137 dsl_dataset_t *origin; 4138 error = dsl_dataset_hold_obj(dp, 4139 dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin); 4140 if (error != 0) 4141 return (B_FALSE); 4142 ret = dsl_dataset_is_before(origin, earlier, earlier_txg); 4143 dsl_dataset_rele(origin, FTAG); 4144 return (ret); 4145} 4146 4147void 4148dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) 4149{ 4150 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 4151 dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); 4152} 4153 4154boolean_t 4155dsl_dataset_is_zapified(dsl_dataset_t *ds) 4156{ 4157 dmu_object_info_t doi; 4158 4159 dmu_object_info_from_db(ds->ds_dbuf, &doi); 4160 return (doi.doi_type == DMU_OTN_ZAP_METADATA); 4161} 4162 4163boolean_t 4164dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds) 4165{ 4166 return (dsl_dataset_is_zapified(ds) && 4167 zap_contains(ds->ds_dir->dd_pool->dp_meta_objset, 4168 ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0); 4169} 4170 4171uint64_t 4172dsl_dataset_get_remap_deadlist_object(dsl_dataset_t *ds) 4173{ 4174 uint64_t remap_deadlist_obj; 4175 int err; 4176 4177 if (!dsl_dataset_is_zapified(ds)) 4178 return (0); 4179 4180 err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object, 4181 DS_FIELD_REMAP_DEADLIST, sizeof (remap_deadlist_obj), 1, 4182 &remap_deadlist_obj); 4183 4184 if (err != 0) { 4185 VERIFY3S(err, ==, ENOENT); 4186 return (0); 4187 } 4188 4189 ASSERT(remap_deadlist_obj != 0); 4190 return (remap_deadlist_obj); 4191} 4192 4193boolean_t 4194dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds) 4195{ 4196 EQUIV(dsl_deadlist_is_open(&ds->ds_remap_deadlist), 4197 dsl_dataset_get_remap_deadlist_object(ds) != 0); 4198 return (dsl_deadlist_is_open(&ds->ds_remap_deadlist)); 4199} 4200 4201static void 4202dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds, uint64_t obj, 4203 dmu_tx_t *tx) 4204{ 4205 ASSERT(obj != 0); 4206 dsl_dataset_zapify(ds, tx); 4207 VERIFY0(zap_add(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_object, 4208 DS_FIELD_REMAP_DEADLIST, sizeof (obj), 1, &obj, tx)); 4209} 4210 4211static void 4212dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds, dmu_tx_t *tx) 4213{ 4214 VERIFY0(zap_remove(ds->ds_dir->dd_pool->dp_meta_objset, 4215 ds->ds_object, DS_FIELD_REMAP_DEADLIST, tx)); 4216} 4217 4218void 4219dsl_dataset_destroy_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx) 4220{ 4221 uint64_t remap_deadlist_object; 4222 spa_t *spa = ds->ds_dir->dd_pool->dp_spa; 4223 4224 ASSERT(dmu_tx_is_syncing(tx)); 4225 ASSERT(dsl_dataset_remap_deadlist_exists(ds)); 4226 4227 remap_deadlist_object = ds->ds_remap_deadlist.dl_object; 4228 dsl_deadlist_close(&ds->ds_remap_deadlist); 4229 dsl_deadlist_free(spa_meta_objset(spa), remap_deadlist_object, tx); 4230 dsl_dataset_unset_remap_deadlist_object(ds, tx); 4231 spa_feature_decr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); 4232} 4233 4234void 4235dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx) 4236{ 4237 uint64_t remap_deadlist_obj; 4238 spa_t *spa = ds->ds_dir->dd_pool->dp_spa; 4239 4240 ASSERT(dmu_tx_is_syncing(tx)); 4241 ASSERT(MUTEX_HELD(&ds->ds_remap_deadlist_lock)); 4242 /* 4243 * Currently we only create remap deadlists when there are indirect 4244 * vdevs with referenced mappings. 4245 */ 4246 ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REMOVAL)); 4247 4248 remap_deadlist_obj = dsl_deadlist_clone( 4249 &ds->ds_deadlist, UINT64_MAX, 4250 dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); 4251 dsl_dataset_set_remap_deadlist_object(ds, 4252 remap_deadlist_obj, tx); 4253 dsl_deadlist_open(&ds->ds_remap_deadlist, spa_meta_objset(spa), 4254 remap_deadlist_obj); 4255 spa_feature_incr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); 4256} 4257