dsl_dataset.c revision 288539
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org> 24 * Copyright (c) 2011, 2014 by Delphix. All rights reserved. 25 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 26 * Copyright (c) 2014 RackTop Systems. 27 */ 28 29#include <sys/dmu_objset.h> 30#include <sys/dsl_dataset.h> 31#include <sys/dsl_dir.h> 32#include <sys/dsl_prop.h> 33#include <sys/dsl_synctask.h> 34#include <sys/dmu_traverse.h> 35#include <sys/dmu_impl.h> 36#include <sys/dmu_tx.h> 37#include <sys/arc.h> 38#include <sys/zio.h> 39#include <sys/zap.h> 40#include <sys/zfeature.h> 41#include <sys/unique.h> 42#include <sys/zfs_context.h> 43#include <sys/zfs_ioctl.h> 44#include <sys/spa.h> 45#include <sys/zfs_znode.h> 46#include <sys/zfs_onexit.h> 47#include <sys/zvol.h> 48#include <sys/dsl_scan.h> 49#include <sys/dsl_deadlist.h> 50#include <sys/dsl_destroy.h> 51#include <sys/dsl_userhold.h> 52#include <sys/dsl_bookmark.h> 53 54SYSCTL_DECL(_vfs_zfs); 55 56/* 57 * The SPA supports block sizes up to 16MB. However, very large blocks 58 * can have an impact on i/o latency (e.g. tying up a spinning disk for 59 * ~300ms), and also potentially on the memory allocator. Therefore, 60 * we do not allow the recordsize to be set larger than zfs_max_recordsize 61 * (default 1MB). Larger blocks can be created by changing this tunable, 62 * and pools with larger blocks can always be imported and used, regardless 63 * of this setting. 64 */ 65int zfs_max_recordsize = 1 * 1024 * 1024; 66SYSCTL_INT(_vfs_zfs, OID_AUTO, max_recordsize, CTLFLAG_RWTUN, 67 &zfs_max_recordsize, 0, 68 "Maximum block size. Expect dragons when tuning this."); 69 70#define SWITCH64(x, y) \ 71 { \ 72 uint64_t __tmp = (x); \ 73 (x) = (y); \ 74 (y) = __tmp; \ 75 } 76 77#define DS_REF_MAX (1ULL << 62) 78 79extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds); 80extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds); 81 82/* 83 * Figure out how much of this delta should be propogated to the dsl_dir 84 * layer. If there's a refreservation, that space has already been 85 * partially accounted for in our ancestors. 86 */ 87static int64_t 88parent_delta(dsl_dataset_t *ds, int64_t delta) 89{ 90 dsl_dataset_phys_t *ds_phys; 91 uint64_t old_bytes, new_bytes; 92 93 if (ds->ds_reserved == 0) 94 return (delta); 95 96 ds_phys = dsl_dataset_phys(ds); 97 old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved); 98 new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 99 100 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 101 return (new_bytes - old_bytes); 102} 103 104void 105dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 106{ 107 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 108 int compressed = BP_GET_PSIZE(bp); 109 int uncompressed = BP_GET_UCSIZE(bp); 110 int64_t delta; 111 112 dprintf_bp(bp, "ds=%p", ds); 113 114 ASSERT(dmu_tx_is_syncing(tx)); 115 /* It could have been compressed away to nothing */ 116 if (BP_IS_HOLE(bp)) 117 return; 118 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 119 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 120 if (ds == NULL) { 121 dsl_pool_mos_diduse_space(tx->tx_pool, 122 used, compressed, uncompressed); 123 return; 124 } 125 126 dmu_buf_will_dirty(ds->ds_dbuf, tx); 127 mutex_enter(&ds->ds_lock); 128 delta = parent_delta(ds, used); 129 dsl_dataset_phys(ds)->ds_referenced_bytes += used; 130 dsl_dataset_phys(ds)->ds_compressed_bytes += compressed; 131 dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed; 132 dsl_dataset_phys(ds)->ds_unique_bytes += used; 133 if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) 134 ds->ds_need_large_blocks = B_TRUE; 135 mutex_exit(&ds->ds_lock); 136 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 137 compressed, uncompressed, tx); 138 dsl_dir_transfer_space(ds->ds_dir, used - delta, 139 DD_USED_REFRSRV, DD_USED_HEAD, NULL); 140} 141 142int 143dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 144 boolean_t async) 145{ 146 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 147 int compressed = BP_GET_PSIZE(bp); 148 int uncompressed = BP_GET_UCSIZE(bp); 149 150 if (BP_IS_HOLE(bp)) 151 return (0); 152 153 ASSERT(dmu_tx_is_syncing(tx)); 154 ASSERT(bp->blk_birth <= tx->tx_txg); 155 156 if (ds == NULL) { 157 dsl_free(tx->tx_pool, tx->tx_txg, bp); 158 dsl_pool_mos_diduse_space(tx->tx_pool, 159 -used, -compressed, -uncompressed); 160 return (used); 161 } 162 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 163 164 ASSERT(!dsl_dataset_is_snapshot(ds)); 165 dmu_buf_will_dirty(ds->ds_dbuf, tx); 166 167 if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { 168 int64_t delta; 169 170 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 171 dsl_free(tx->tx_pool, tx->tx_txg, bp); 172 173 mutex_enter(&ds->ds_lock); 174 ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used || 175 !DS_UNIQUE_IS_ACCURATE(ds)); 176 delta = parent_delta(ds, -used); 177 dsl_dataset_phys(ds)->ds_unique_bytes -= used; 178 mutex_exit(&ds->ds_lock); 179 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 180 delta, -compressed, -uncompressed, tx); 181 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 182 DD_USED_REFRSRV, DD_USED_HEAD, NULL); 183 } else { 184 dprintf_bp(bp, "putting on dead list: %s", ""); 185 if (async) { 186 /* 187 * We are here as part of zio's write done callback, 188 * which means we're a zio interrupt thread. We can't 189 * call dsl_deadlist_insert() now because it may block 190 * waiting for I/O. Instead, put bp on the deferred 191 * queue and let dsl_pool_sync() finish the job. 192 */ 193 bplist_append(&ds->ds_pending_deadlist, bp); 194 } else { 195 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 196 } 197 ASSERT3U(ds->ds_prev->ds_object, ==, 198 dsl_dataset_phys(ds)->ds_prev_snap_obj); 199 ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0); 200 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 201 if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 202 ds->ds_object && bp->blk_birth > 203 dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) { 204 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 205 mutex_enter(&ds->ds_prev->ds_lock); 206 dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used; 207 mutex_exit(&ds->ds_prev->ds_lock); 208 } 209 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 210 dsl_dir_transfer_space(ds->ds_dir, used, 211 DD_USED_HEAD, DD_USED_SNAP, tx); 212 } 213 } 214 mutex_enter(&ds->ds_lock); 215 ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used); 216 dsl_dataset_phys(ds)->ds_referenced_bytes -= used; 217 ASSERT3U(dsl_dataset_phys(ds)->ds_compressed_bytes, >=, compressed); 218 dsl_dataset_phys(ds)->ds_compressed_bytes -= compressed; 219 ASSERT3U(dsl_dataset_phys(ds)->ds_uncompressed_bytes, >=, uncompressed); 220 dsl_dataset_phys(ds)->ds_uncompressed_bytes -= uncompressed; 221 mutex_exit(&ds->ds_lock); 222 223 return (used); 224} 225 226uint64_t 227dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 228{ 229 uint64_t trysnap = 0; 230 231 if (ds == NULL) 232 return (0); 233 /* 234 * The snapshot creation could fail, but that would cause an 235 * incorrect FALSE return, which would only result in an 236 * overestimation of the amount of space that an operation would 237 * consume, which is OK. 238 * 239 * There's also a small window where we could miss a pending 240 * snapshot, because we could set the sync task in the quiescing 241 * phase. So this should only be used as a guess. 242 */ 243 if (ds->ds_trysnap_txg > 244 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 245 trysnap = ds->ds_trysnap_txg; 246 return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap)); 247} 248 249boolean_t 250dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 251 uint64_t blk_birth) 252{ 253 if (blk_birth <= dsl_dataset_prev_snap_txg(ds) || 254 (bp != NULL && BP_IS_HOLE(bp))) 255 return (B_FALSE); 256 257 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 258 259 return (B_TRUE); 260} 261 262/* ARGSUSED */ 263static void 264dsl_dataset_evict(dmu_buf_t *db, void *dsv) 265{ 266 dsl_dataset_t *ds = dsv; 267 268 ASSERT(ds->ds_owner == NULL); 269 270 unique_remove(ds->ds_fsid_guid); 271 272 if (ds->ds_objset != NULL) 273 dmu_objset_evict(ds->ds_objset); 274 275 if (ds->ds_prev) { 276 dsl_dataset_rele(ds->ds_prev, ds); 277 ds->ds_prev = NULL; 278 } 279 280 bplist_destroy(&ds->ds_pending_deadlist); 281 if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0) 282 dsl_deadlist_close(&ds->ds_deadlist); 283 if (ds->ds_dir) 284 dsl_dir_rele(ds->ds_dir, ds); 285 286 ASSERT(!list_link_active(&ds->ds_synced_link)); 287 288 if (mutex_owned(&ds->ds_lock)) 289 mutex_exit(&ds->ds_lock); 290 mutex_destroy(&ds->ds_lock); 291 if (mutex_owned(&ds->ds_opening_lock)) 292 mutex_exit(&ds->ds_opening_lock); 293 mutex_destroy(&ds->ds_opening_lock); 294 mutex_destroy(&ds->ds_sendstream_lock); 295 refcount_destroy(&ds->ds_longholds); 296 297 kmem_free(ds, sizeof (dsl_dataset_t)); 298} 299 300int 301dsl_dataset_get_snapname(dsl_dataset_t *ds) 302{ 303 dsl_dataset_phys_t *headphys; 304 int err; 305 dmu_buf_t *headdbuf; 306 dsl_pool_t *dp = ds->ds_dir->dd_pool; 307 objset_t *mos = dp->dp_meta_objset; 308 309 if (ds->ds_snapname[0]) 310 return (0); 311 if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) 312 return (0); 313 314 err = dmu_bonus_hold(mos, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, 315 FTAG, &headdbuf); 316 if (err != 0) 317 return (err); 318 headphys = headdbuf->db_data; 319 err = zap_value_search(dp->dp_meta_objset, 320 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 321 dmu_buf_rele(headdbuf, FTAG); 322 return (err); 323} 324 325int 326dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 327{ 328 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 329 uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 330 matchtype_t mt; 331 int err; 332 333 if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) 334 mt = MT_FIRST; 335 else 336 mt = MT_EXACT; 337 338 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 339 value, mt, NULL, 0, NULL); 340 if (err == ENOTSUP && mt == MT_FIRST) 341 err = zap_lookup(mos, snapobj, name, 8, 1, value); 342 return (err); 343} 344 345int 346dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, 347 boolean_t adj_cnt) 348{ 349 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 350 uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 351 matchtype_t mt; 352 int err; 353 354 dsl_dir_snap_cmtime_update(ds->ds_dir); 355 356 if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) 357 mt = MT_FIRST; 358 else 359 mt = MT_EXACT; 360 361 err = zap_remove_norm(mos, snapobj, name, mt, tx); 362 if (err == ENOTSUP && mt == MT_FIRST) 363 err = zap_remove(mos, snapobj, name, tx); 364 365 if (err == 0 && adj_cnt) 366 dsl_fs_ss_count_adjust(ds->ds_dir, -1, 367 DD_FIELD_SNAPSHOT_COUNT, tx); 368 369 return (err); 370} 371 372boolean_t 373dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) 374{ 375 dmu_buf_t *dbuf = ds->ds_dbuf; 376 boolean_t result = B_FALSE; 377 378 if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset, 379 ds->ds_object, DMU_BONUS_BLKID, tag)) { 380 381 if (ds == dmu_buf_get_user(dbuf)) 382 result = B_TRUE; 383 else 384 dmu_buf_rele(dbuf, tag); 385 } 386 387 return (result); 388} 389 390int 391dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 392 dsl_dataset_t **dsp) 393{ 394 objset_t *mos = dp->dp_meta_objset; 395 dmu_buf_t *dbuf; 396 dsl_dataset_t *ds; 397 int err; 398 dmu_object_info_t doi; 399 400 ASSERT(dsl_pool_config_held(dp)); 401 402 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 403 if (err != 0) 404 return (err); 405 406 /* Make sure dsobj has the correct object type. */ 407 dmu_object_info_from_db(dbuf, &doi); 408 if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) { 409 dmu_buf_rele(dbuf, tag); 410 return (SET_ERROR(EINVAL)); 411 } 412 413 ds = dmu_buf_get_user(dbuf); 414 if (ds == NULL) { 415 dsl_dataset_t *winner = NULL; 416 417 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 418 ds->ds_dbuf = dbuf; 419 ds->ds_object = dsobj; 420 421 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 422 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 423 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 424 refcount_create(&ds->ds_longholds); 425 426 bplist_create(&ds->ds_pending_deadlist); 427 dsl_deadlist_open(&ds->ds_deadlist, 428 mos, dsl_dataset_phys(ds)->ds_deadlist_obj); 429 430 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 431 offsetof(dmu_sendarg_t, dsa_link)); 432 433 if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 434 int zaperr = zap_contains(mos, dsobj, 435 DS_FIELD_LARGE_BLOCKS); 436 if (zaperr != ENOENT) { 437 VERIFY0(zaperr); 438 ds->ds_large_blocks = B_TRUE; 439 } 440 } 441 442 if (err == 0) { 443 err = dsl_dir_hold_obj(dp, 444 dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds, 445 &ds->ds_dir); 446 } 447 if (err != 0) { 448 mutex_destroy(&ds->ds_lock); 449 mutex_destroy(&ds->ds_opening_lock); 450 mutex_destroy(&ds->ds_sendstream_lock); 451 refcount_destroy(&ds->ds_longholds); 452 bplist_destroy(&ds->ds_pending_deadlist); 453 dsl_deadlist_close(&ds->ds_deadlist); 454 kmem_free(ds, sizeof (dsl_dataset_t)); 455 dmu_buf_rele(dbuf, tag); 456 return (err); 457 } 458 459 if (!dsl_dataset_is_snapshot(ds)) { 460 ds->ds_snapname[0] = '\0'; 461 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 462 err = dsl_dataset_hold_obj(dp, 463 dsl_dataset_phys(ds)->ds_prev_snap_obj, 464 ds, &ds->ds_prev); 465 } 466 if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 467 int zaperr = zap_lookup(mos, ds->ds_object, 468 DS_FIELD_BOOKMARK_NAMES, 469 sizeof (ds->ds_bookmarks), 1, 470 &ds->ds_bookmarks); 471 if (zaperr != ENOENT) 472 VERIFY0(zaperr); 473 } 474 } else { 475 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 476 err = dsl_dataset_get_snapname(ds); 477 if (err == 0 && 478 dsl_dataset_phys(ds)->ds_userrefs_obj != 0) { 479 err = zap_count( 480 ds->ds_dir->dd_pool->dp_meta_objset, 481 dsl_dataset_phys(ds)->ds_userrefs_obj, 482 &ds->ds_userrefs); 483 } 484 } 485 486 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 487 err = dsl_prop_get_int_ds(ds, 488 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 489 &ds->ds_reserved); 490 if (err == 0) { 491 err = dsl_prop_get_int_ds(ds, 492 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 493 &ds->ds_quota); 494 } 495 } else { 496 ds->ds_reserved = ds->ds_quota = 0; 497 } 498 499 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, 500 dsl_dataset_evict)) != NULL) { 501 bplist_destroy(&ds->ds_pending_deadlist); 502 dsl_deadlist_close(&ds->ds_deadlist); 503 if (ds->ds_prev) 504 dsl_dataset_rele(ds->ds_prev, ds); 505 dsl_dir_rele(ds->ds_dir, ds); 506 mutex_destroy(&ds->ds_lock); 507 mutex_destroy(&ds->ds_opening_lock); 508 mutex_destroy(&ds->ds_sendstream_lock); 509 refcount_destroy(&ds->ds_longholds); 510 kmem_free(ds, sizeof (dsl_dataset_t)); 511 if (err != 0) { 512 dmu_buf_rele(dbuf, tag); 513 return (err); 514 } 515 ds = winner; 516 } else { 517 ds->ds_fsid_guid = 518 unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid); 519 } 520 } 521 ASSERT3P(ds->ds_dbuf, ==, dbuf); 522 ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data); 523 ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 || 524 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 525 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 526 *dsp = ds; 527 return (0); 528} 529 530int 531dsl_dataset_hold(dsl_pool_t *dp, const char *name, 532 void *tag, dsl_dataset_t **dsp) 533{ 534 dsl_dir_t *dd; 535 const char *snapname; 536 uint64_t obj; 537 int err = 0; 538 539 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 540 if (err != 0) 541 return (err); 542 543 ASSERT(dsl_pool_config_held(dp)); 544 obj = dsl_dir_phys(dd)->dd_head_dataset_obj; 545 if (obj != 0) 546 err = dsl_dataset_hold_obj(dp, obj, tag, dsp); 547 else 548 err = SET_ERROR(ENOENT); 549 550 /* we may be looking for a snapshot */ 551 if (err == 0 && snapname != NULL) { 552 dsl_dataset_t *ds; 553 554 if (*snapname++ != '@') { 555 dsl_dataset_rele(*dsp, tag); 556 dsl_dir_rele(dd, FTAG); 557 return (SET_ERROR(ENOENT)); 558 } 559 560 dprintf("looking for snapshot '%s'\n", snapname); 561 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 562 if (err == 0) 563 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 564 dsl_dataset_rele(*dsp, tag); 565 566 if (err == 0) { 567 mutex_enter(&ds->ds_lock); 568 if (ds->ds_snapname[0] == 0) 569 (void) strlcpy(ds->ds_snapname, snapname, 570 sizeof (ds->ds_snapname)); 571 mutex_exit(&ds->ds_lock); 572 *dsp = ds; 573 } 574 } 575 576 dsl_dir_rele(dd, FTAG); 577 return (err); 578} 579 580int 581dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 582 void *tag, dsl_dataset_t **dsp) 583{ 584 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 585 if (err != 0) 586 return (err); 587 if (!dsl_dataset_tryown(*dsp, tag)) { 588 dsl_dataset_rele(*dsp, tag); 589 *dsp = NULL; 590 return (SET_ERROR(EBUSY)); 591 } 592 return (0); 593} 594 595int 596dsl_dataset_own(dsl_pool_t *dp, const char *name, 597 void *tag, dsl_dataset_t **dsp) 598{ 599 int err = dsl_dataset_hold(dp, name, tag, dsp); 600 if (err != 0) 601 return (err); 602 if (!dsl_dataset_tryown(*dsp, tag)) { 603 dsl_dataset_rele(*dsp, tag); 604 return (SET_ERROR(EBUSY)); 605 } 606 return (0); 607} 608 609/* 610 * See the comment above dsl_pool_hold() for details. In summary, a long 611 * hold is used to prevent destruction of a dataset while the pool hold 612 * is dropped, allowing other concurrent operations (e.g. spa_sync()). 613 * 614 * The dataset and pool must be held when this function is called. After it 615 * is called, the pool hold may be released while the dataset is still held 616 * and accessed. 617 */ 618void 619dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 620{ 621 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 622 (void) refcount_add(&ds->ds_longholds, tag); 623} 624 625void 626dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 627{ 628 (void) refcount_remove(&ds->ds_longholds, tag); 629} 630 631/* Return B_TRUE if there are any long holds on this dataset. */ 632boolean_t 633dsl_dataset_long_held(dsl_dataset_t *ds) 634{ 635 return (!refcount_is_zero(&ds->ds_longholds)); 636} 637 638void 639dsl_dataset_name(dsl_dataset_t *ds, char *name) 640{ 641 if (ds == NULL) { 642 (void) strcpy(name, "mos"); 643 } else { 644 dsl_dir_name(ds->ds_dir, name); 645 VERIFY0(dsl_dataset_get_snapname(ds)); 646 if (ds->ds_snapname[0]) { 647 (void) strcat(name, "@"); 648 /* 649 * We use a "recursive" mutex so that we 650 * can call dprintf_ds() with ds_lock held. 651 */ 652 if (!MUTEX_HELD(&ds->ds_lock)) { 653 mutex_enter(&ds->ds_lock); 654 (void) strcat(name, ds->ds_snapname); 655 mutex_exit(&ds->ds_lock); 656 } else { 657 (void) strcat(name, ds->ds_snapname); 658 } 659 } 660 } 661} 662 663void 664dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 665{ 666 dmu_buf_rele(ds->ds_dbuf, tag); 667} 668 669void 670dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 671{ 672 ASSERT3P(ds->ds_owner, ==, tag); 673 ASSERT(ds->ds_dbuf != NULL); 674 675 mutex_enter(&ds->ds_lock); 676 ds->ds_owner = NULL; 677 mutex_exit(&ds->ds_lock); 678 dsl_dataset_long_rele(ds, tag); 679 dsl_dataset_rele(ds, tag); 680} 681 682boolean_t 683dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 684{ 685 boolean_t gotit = FALSE; 686 687 mutex_enter(&ds->ds_lock); 688 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 689 ds->ds_owner = tag; 690 dsl_dataset_long_hold(ds, tag); 691 gotit = TRUE; 692 } 693 mutex_exit(&ds->ds_lock); 694 return (gotit); 695} 696 697uint64_t 698dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 699 uint64_t flags, dmu_tx_t *tx) 700{ 701 dsl_pool_t *dp = dd->dd_pool; 702 dmu_buf_t *dbuf; 703 dsl_dataset_phys_t *dsphys; 704 uint64_t dsobj; 705 objset_t *mos = dp->dp_meta_objset; 706 707 if (origin == NULL) 708 origin = dp->dp_origin_snap; 709 710 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 711 ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0); 712 ASSERT(dmu_tx_is_syncing(tx)); 713 ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0); 714 715 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 716 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 717 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 718 dmu_buf_will_dirty(dbuf, tx); 719 dsphys = dbuf->db_data; 720 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 721 dsphys->ds_dir_obj = dd->dd_object; 722 dsphys->ds_flags = flags; 723 dsphys->ds_fsid_guid = unique_create(); 724 do { 725 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 726 sizeof (dsphys->ds_guid)); 727 } while (dsphys->ds_guid == 0); 728 dsphys->ds_snapnames_zapobj = 729 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 730 DMU_OT_NONE, 0, tx); 731 dsphys->ds_creation_time = gethrestime_sec(); 732 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 733 734 if (origin == NULL) { 735 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 736 } else { 737 dsl_dataset_t *ohds; /* head of the origin snapshot */ 738 739 dsphys->ds_prev_snap_obj = origin->ds_object; 740 dsphys->ds_prev_snap_txg = 741 dsl_dataset_phys(origin)->ds_creation_txg; 742 dsphys->ds_referenced_bytes = 743 dsl_dataset_phys(origin)->ds_referenced_bytes; 744 dsphys->ds_compressed_bytes = 745 dsl_dataset_phys(origin)->ds_compressed_bytes; 746 dsphys->ds_uncompressed_bytes = 747 dsl_dataset_phys(origin)->ds_uncompressed_bytes; 748 dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp; 749 750 /* 751 * Inherit flags that describe the dataset's contents 752 * (INCONSISTENT) or properties (Case Insensitive). 753 */ 754 dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags & 755 (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET); 756 757 if (origin->ds_large_blocks) 758 dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx); 759 760 dmu_buf_will_dirty(origin->ds_dbuf, tx); 761 dsl_dataset_phys(origin)->ds_num_children++; 762 763 VERIFY0(dsl_dataset_hold_obj(dp, 764 dsl_dir_phys(origin->ds_dir)->dd_head_dataset_obj, 765 FTAG, &ohds)); 766 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 767 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 768 dsl_dataset_rele(ohds, FTAG); 769 770 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 771 if (dsl_dataset_phys(origin)->ds_next_clones_obj == 0) { 772 dsl_dataset_phys(origin)->ds_next_clones_obj = 773 zap_create(mos, 774 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 775 } 776 VERIFY0(zap_add_int(mos, 777 dsl_dataset_phys(origin)->ds_next_clones_obj, 778 dsobj, tx)); 779 } 780 781 dmu_buf_will_dirty(dd->dd_dbuf, tx); 782 dsl_dir_phys(dd)->dd_origin_obj = origin->ds_object; 783 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 784 if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { 785 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 786 dsl_dir_phys(origin->ds_dir)->dd_clones = 787 zap_create(mos, 788 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 789 } 790 VERIFY0(zap_add_int(mos, 791 dsl_dir_phys(origin->ds_dir)->dd_clones, 792 dsobj, tx)); 793 } 794 } 795 796 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 797 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 798 799 dmu_buf_rele(dbuf, FTAG); 800 801 dmu_buf_will_dirty(dd->dd_dbuf, tx); 802 dsl_dir_phys(dd)->dd_head_dataset_obj = dsobj; 803 804 return (dsobj); 805} 806 807static void 808dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 809{ 810 objset_t *os; 811 812 VERIFY0(dmu_objset_from_ds(ds, &os)); 813 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 814 dsl_dataset_dirty(ds, tx); 815} 816 817uint64_t 818dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 819 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 820{ 821 dsl_pool_t *dp = pdd->dd_pool; 822 uint64_t dsobj, ddobj; 823 dsl_dir_t *dd; 824 825 ASSERT(dmu_tx_is_syncing(tx)); 826 ASSERT(lastname[0] != '@'); 827 828 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 829 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 830 831 dsobj = dsl_dataset_create_sync_dd(dd, origin, 832 flags & ~DS_CREATE_FLAG_NODIRTY, tx); 833 834 dsl_deleg_set_create_perms(dd, tx, cr); 835 836 /* 837 * Since we're creating a new node we know it's a leaf, so we can 838 * initialize the counts if the limit feature is active. 839 */ 840 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 841 uint64_t cnt = 0; 842 objset_t *os = dd->dd_pool->dp_meta_objset; 843 844 dsl_dir_zapify(dd, tx); 845 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 846 sizeof (cnt), 1, &cnt, tx)); 847 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 848 sizeof (cnt), 1, &cnt, tx)); 849 } 850 851 dsl_dir_rele(dd, FTAG); 852 853 /* 854 * If we are creating a clone, make sure we zero out any stale 855 * data from the origin snapshots zil header. 856 */ 857 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 858 dsl_dataset_t *ds; 859 860 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 861 dsl_dataset_zero_zil(ds, tx); 862 dsl_dataset_rele(ds, FTAG); 863 } 864 865 return (dsobj); 866} 867 868#ifdef __FreeBSD__ 869/* FreeBSD ioctl compat begin */ 870struct destroyarg { 871 nvlist_t *nvl; 872 const char *snapname; 873}; 874 875static int 876dsl_check_snap_cb(const char *name, void *arg) 877{ 878 struct destroyarg *da = arg; 879 dsl_dataset_t *ds; 880 char *dsname; 881 882 dsname = kmem_asprintf("%s@%s", name, da->snapname); 883 fnvlist_add_boolean(da->nvl, dsname); 884 kmem_free(dsname, strlen(dsname) + 1); 885 886 return (0); 887} 888 889int 890dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname, 891 nvlist_t *snaps) 892{ 893 struct destroyarg *da; 894 int err; 895 896 da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); 897 da->nvl = snaps; 898 da->snapname = snapname; 899 err = dmu_objset_find(fsname, dsl_check_snap_cb, da, 900 DS_FIND_CHILDREN); 901 kmem_free(da, sizeof (struct destroyarg)); 902 903 return (err); 904} 905/* FreeBSD ioctl compat end */ 906#endif /* __FreeBSD__ */ 907 908/* 909 * The unique space in the head dataset can be calculated by subtracting 910 * the space used in the most recent snapshot, that is still being used 911 * in this file system, from the space currently in use. To figure out 912 * the space in the most recent snapshot still in use, we need to take 913 * the total space used in the snapshot and subtract out the space that 914 * has been freed up since the snapshot was taken. 915 */ 916void 917dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 918{ 919 uint64_t mrs_used; 920 uint64_t dlused, dlcomp, dluncomp; 921 922 ASSERT(!dsl_dataset_is_snapshot(ds)); 923 924 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) 925 mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes; 926 else 927 mrs_used = 0; 928 929 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 930 931 ASSERT3U(dlused, <=, mrs_used); 932 dsl_dataset_phys(ds)->ds_unique_bytes = 933 dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused); 934 935 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 936 SPA_VERSION_UNIQUE_ACCURATE) 937 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 938} 939 940void 941dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 942 dmu_tx_t *tx) 943{ 944 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 945 uint64_t count; 946 int err; 947 948 ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2); 949 err = zap_remove_int(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 950 obj, tx); 951 /* 952 * The err should not be ENOENT, but a bug in a previous version 953 * of the code could cause upgrade_clones_cb() to not set 954 * ds_next_snap_obj when it should, leading to a missing entry. 955 * If we knew that the pool was created after 956 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 957 * ENOENT. However, at least we can check that we don't have 958 * too many entries in the next_clones_obj even after failing to 959 * remove this one. 960 */ 961 if (err != ENOENT) 962 VERIFY0(err); 963 ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 964 &count)); 965 ASSERT3U(count, <=, dsl_dataset_phys(ds)->ds_num_children - 2); 966} 967 968 969blkptr_t * 970dsl_dataset_get_blkptr(dsl_dataset_t *ds) 971{ 972 return (&dsl_dataset_phys(ds)->ds_bp); 973} 974 975void 976dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 977{ 978 ASSERT(dmu_tx_is_syncing(tx)); 979 /* If it's the meta-objset, set dp_meta_rootbp */ 980 if (ds == NULL) { 981 tx->tx_pool->dp_meta_rootbp = *bp; 982 } else { 983 dmu_buf_will_dirty(ds->ds_dbuf, tx); 984 dsl_dataset_phys(ds)->ds_bp = *bp; 985 } 986} 987 988spa_t * 989dsl_dataset_get_spa(dsl_dataset_t *ds) 990{ 991 return (ds->ds_dir->dd_pool->dp_spa); 992} 993 994void 995dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 996{ 997 dsl_pool_t *dp; 998 999 if (ds == NULL) /* this is the meta-objset */ 1000 return; 1001 1002 ASSERT(ds->ds_objset != NULL); 1003 1004 if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) 1005 panic("dirtying snapshot!"); 1006 1007 dp = ds->ds_dir->dd_pool; 1008 1009 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 1010 /* up the hold count until we can be written out */ 1011 dmu_buf_add_ref(ds->ds_dbuf, ds); 1012 } 1013} 1014 1015boolean_t 1016dsl_dataset_is_dirty(dsl_dataset_t *ds) 1017{ 1018 for (int t = 0; t < TXG_SIZE; t++) { 1019 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 1020 ds, t)) 1021 return (B_TRUE); 1022 } 1023 return (B_FALSE); 1024} 1025 1026static int 1027dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1028{ 1029 uint64_t asize; 1030 1031 if (!dmu_tx_is_syncing(tx)) 1032 return (0); 1033 1034 /* 1035 * If there's an fs-only reservation, any blocks that might become 1036 * owned by the snapshot dataset must be accommodated by space 1037 * outside of the reservation. 1038 */ 1039 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 1040 asize = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved); 1041 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 1042 return (SET_ERROR(ENOSPC)); 1043 1044 /* 1045 * Propagate any reserved space for this snapshot to other 1046 * snapshot checks in this sync group. 1047 */ 1048 if (asize > 0) 1049 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1050 1051 return (0); 1052} 1053 1054typedef struct dsl_dataset_snapshot_arg { 1055 nvlist_t *ddsa_snaps; 1056 nvlist_t *ddsa_props; 1057 nvlist_t *ddsa_errors; 1058 cred_t *ddsa_cr; 1059} dsl_dataset_snapshot_arg_t; 1060 1061int 1062dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, 1063 dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr) 1064{ 1065 int error; 1066 uint64_t value; 1067 1068 ds->ds_trysnap_txg = tx->tx_txg; 1069 1070 if (!dmu_tx_is_syncing(tx)) 1071 return (0); 1072 1073 /* 1074 * We don't allow multiple snapshots of the same txg. If there 1075 * is already one, try again. 1076 */ 1077 if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) 1078 return (SET_ERROR(EAGAIN)); 1079 1080 /* 1081 * Check for conflicting snapshot name. 1082 */ 1083 error = dsl_dataset_snap_lookup(ds, snapname, &value); 1084 if (error == 0) 1085 return (SET_ERROR(EEXIST)); 1086 if (error != ENOENT) 1087 return (error); 1088 1089 /* 1090 * We don't allow taking snapshots of inconsistent datasets, such as 1091 * those into which we are currently receiving. However, if we are 1092 * creating this snapshot as part of a receive, this check will be 1093 * executed atomically with respect to the completion of the receive 1094 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this 1095 * case we ignore this, knowing it will be fixed up for us shortly in 1096 * dmu_recv_end_sync(). 1097 */ 1098 if (!recv && DS_IS_INCONSISTENT(ds)) 1099 return (SET_ERROR(EBUSY)); 1100 1101 /* 1102 * Skip the check for temporary snapshots or if we have already checked 1103 * the counts in dsl_dataset_snapshot_check. This means we really only 1104 * check the count here when we're receiving a stream. 1105 */ 1106 if (cnt != 0 && cr != NULL) { 1107 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1108 ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr); 1109 if (error != 0) 1110 return (error); 1111 } 1112 1113 error = dsl_dataset_snapshot_reserve_space(ds, tx); 1114 if (error != 0) 1115 return (error); 1116 1117 return (0); 1118} 1119 1120static int 1121dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) 1122{ 1123 dsl_dataset_snapshot_arg_t *ddsa = arg; 1124 dsl_pool_t *dp = dmu_tx_pool(tx); 1125 nvpair_t *pair; 1126 int rv = 0; 1127 1128 /* 1129 * Pre-compute how many total new snapshots will be created for each 1130 * level in the tree and below. This is needed for validating the 1131 * snapshot limit when either taking a recursive snapshot or when 1132 * taking multiple snapshots. 1133 * 1134 * The problem is that the counts are not actually adjusted when 1135 * we are checking, only when we finally sync. For a single snapshot, 1136 * this is easy, the count will increase by 1 at each node up the tree, 1137 * but its more complicated for the recursive/multiple snapshot case. 1138 * 1139 * The dsl_fs_ss_limit_check function does recursively check the count 1140 * at each level up the tree but since it is validating each snapshot 1141 * independently we need to be sure that we are validating the complete 1142 * count for the entire set of snapshots. We do this by rolling up the 1143 * counts for each component of the name into an nvlist and then 1144 * checking each of those cases with the aggregated count. 1145 * 1146 * This approach properly handles not only the recursive snapshot 1147 * case (where we get all of those on the ddsa_snaps list) but also 1148 * the sibling case (e.g. snapshot a/b and a/c so that we will also 1149 * validate the limit on 'a' using a count of 2). 1150 * 1151 * We validate the snapshot names in the third loop and only report 1152 * name errors once. 1153 */ 1154 if (dmu_tx_is_syncing(tx)) { 1155 nvlist_t *cnt_track = NULL; 1156 cnt_track = fnvlist_alloc(); 1157 1158 /* Rollup aggregated counts into the cnt_track list */ 1159 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1160 pair != NULL; 1161 pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1162 char *pdelim; 1163 uint64_t val; 1164 char nm[MAXPATHLEN]; 1165 1166 (void) strlcpy(nm, nvpair_name(pair), sizeof (nm)); 1167 pdelim = strchr(nm, '@'); 1168 if (pdelim == NULL) 1169 continue; 1170 *pdelim = '\0'; 1171 1172 do { 1173 if (nvlist_lookup_uint64(cnt_track, nm, 1174 &val) == 0) { 1175 /* update existing entry */ 1176 fnvlist_add_uint64(cnt_track, nm, 1177 val + 1); 1178 } else { 1179 /* add to list */ 1180 fnvlist_add_uint64(cnt_track, nm, 1); 1181 } 1182 1183 pdelim = strrchr(nm, '/'); 1184 if (pdelim != NULL) 1185 *pdelim = '\0'; 1186 } while (pdelim != NULL); 1187 } 1188 1189 /* Check aggregated counts at each level */ 1190 for (pair = nvlist_next_nvpair(cnt_track, NULL); 1191 pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) { 1192 int error = 0; 1193 char *name; 1194 uint64_t cnt = 0; 1195 dsl_dataset_t *ds; 1196 1197 name = nvpair_name(pair); 1198 cnt = fnvpair_value_uint64(pair); 1199 ASSERT(cnt > 0); 1200 1201 error = dsl_dataset_hold(dp, name, FTAG, &ds); 1202 if (error == 0) { 1203 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1204 ZFS_PROP_SNAPSHOT_LIMIT, NULL, 1205 ddsa->ddsa_cr); 1206 dsl_dataset_rele(ds, FTAG); 1207 } 1208 1209 if (error != 0) { 1210 if (ddsa->ddsa_errors != NULL) 1211 fnvlist_add_int32(ddsa->ddsa_errors, 1212 name, error); 1213 rv = error; 1214 /* only report one error for this check */ 1215 break; 1216 } 1217 } 1218 nvlist_free(cnt_track); 1219 } 1220 1221 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1222 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1223 int error = 0; 1224 dsl_dataset_t *ds; 1225 char *name, *atp; 1226 char dsname[MAXNAMELEN]; 1227 1228 name = nvpair_name(pair); 1229 if (strlen(name) >= MAXNAMELEN) 1230 error = SET_ERROR(ENAMETOOLONG); 1231 if (error == 0) { 1232 atp = strchr(name, '@'); 1233 if (atp == NULL) 1234 error = SET_ERROR(EINVAL); 1235 if (error == 0) 1236 (void) strlcpy(dsname, name, atp - name + 1); 1237 } 1238 if (error == 0) 1239 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 1240 if (error == 0) { 1241 /* passing 0/NULL skips dsl_fs_ss_limit_check */ 1242 error = dsl_dataset_snapshot_check_impl(ds, 1243 atp + 1, tx, B_FALSE, 0, NULL); 1244 dsl_dataset_rele(ds, FTAG); 1245 } 1246 1247 if (error != 0) { 1248 if (ddsa->ddsa_errors != NULL) { 1249 fnvlist_add_int32(ddsa->ddsa_errors, 1250 name, error); 1251 } 1252 rv = error; 1253 } 1254 } 1255 1256 return (rv); 1257} 1258 1259void 1260dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, 1261 dmu_tx_t *tx) 1262{ 1263 static zil_header_t zero_zil; 1264 1265 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1266 dmu_buf_t *dbuf; 1267 dsl_dataset_phys_t *dsphys; 1268 uint64_t dsobj, crtxg; 1269 objset_t *mos = dp->dp_meta_objset; 1270 objset_t *os; 1271 1272 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1273 1274 /* 1275 * If we are on an old pool, the zil must not be active, in which 1276 * case it will be zeroed. Usually zil_suspend() accomplishes this. 1277 */ 1278 ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || 1279 dmu_objset_from_ds(ds, &os) != 0 || 1280 bcmp(&os->os_phys->os_zil_header, &zero_zil, 1281 sizeof (zero_zil)) == 0); 1282 1283 dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); 1284 1285 /* 1286 * The origin's ds_creation_txg has to be < TXG_INITIAL 1287 */ 1288 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1289 crtxg = 1; 1290 else 1291 crtxg = tx->tx_txg; 1292 1293 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1294 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1295 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1296 dmu_buf_will_dirty(dbuf, tx); 1297 dsphys = dbuf->db_data; 1298 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1299 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1300 dsphys->ds_fsid_guid = unique_create(); 1301 do { 1302 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1303 sizeof (dsphys->ds_guid)); 1304 } while (dsphys->ds_guid == 0); 1305 dsphys->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 1306 dsphys->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; 1307 dsphys->ds_next_snap_obj = ds->ds_object; 1308 dsphys->ds_num_children = 1; 1309 dsphys->ds_creation_time = gethrestime_sec(); 1310 dsphys->ds_creation_txg = crtxg; 1311 dsphys->ds_deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; 1312 dsphys->ds_referenced_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes; 1313 dsphys->ds_compressed_bytes = dsl_dataset_phys(ds)->ds_compressed_bytes; 1314 dsphys->ds_uncompressed_bytes = 1315 dsl_dataset_phys(ds)->ds_uncompressed_bytes; 1316 dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags; 1317 dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp; 1318 dmu_buf_rele(dbuf, FTAG); 1319 1320 if (ds->ds_large_blocks) 1321 dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx); 1322 1323 ASSERT3U(ds->ds_prev != 0, ==, 1324 dsl_dataset_phys(ds)->ds_prev_snap_obj != 0); 1325 if (ds->ds_prev) { 1326 uint64_t next_clones_obj = 1327 dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj; 1328 ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 1329 ds->ds_object || 1330 dsl_dataset_phys(ds->ds_prev)->ds_num_children > 1); 1331 if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 1332 ds->ds_object) { 1333 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1334 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, 1335 dsl_dataset_phys(ds->ds_prev)->ds_creation_txg); 1336 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj = dsobj; 1337 } else if (next_clones_obj != 0) { 1338 dsl_dataset_remove_from_next_clones(ds->ds_prev, 1339 dsphys->ds_next_snap_obj, tx); 1340 VERIFY0(zap_add_int(mos, 1341 next_clones_obj, dsobj, tx)); 1342 } 1343 } 1344 1345 /* 1346 * If we have a reference-reservation on this dataset, we will 1347 * need to increase the amount of refreservation being charged 1348 * since our unique space is going to zero. 1349 */ 1350 if (ds->ds_reserved) { 1351 int64_t delta; 1352 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 1353 delta = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, 1354 ds->ds_reserved); 1355 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1356 delta, 0, 0, tx); 1357 } 1358 1359 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1360 dsl_dataset_phys(ds)->ds_deadlist_obj = 1361 dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX, 1362 dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); 1363 dsl_deadlist_close(&ds->ds_deadlist); 1364 dsl_deadlist_open(&ds->ds_deadlist, mos, 1365 dsl_dataset_phys(ds)->ds_deadlist_obj); 1366 dsl_deadlist_add_key(&ds->ds_deadlist, 1367 dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); 1368 1369 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg); 1370 dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj; 1371 dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg; 1372 dsl_dataset_phys(ds)->ds_unique_bytes = 0; 1373 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1374 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1375 1376 VERIFY0(zap_add(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj, 1377 snapname, 8, 1, &dsobj, tx)); 1378 1379 if (ds->ds_prev) 1380 dsl_dataset_rele(ds->ds_prev, ds); 1381 VERIFY0(dsl_dataset_hold_obj(dp, 1382 dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev)); 1383 1384 dsl_scan_ds_snapshotted(ds, tx); 1385 1386 dsl_dir_snap_cmtime_update(ds->ds_dir); 1387 1388 spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); 1389} 1390 1391static void 1392dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) 1393{ 1394 dsl_dataset_snapshot_arg_t *ddsa = arg; 1395 dsl_pool_t *dp = dmu_tx_pool(tx); 1396 nvpair_t *pair; 1397 1398 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1399 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1400 dsl_dataset_t *ds; 1401 char *name, *atp; 1402 char dsname[MAXNAMELEN]; 1403 1404 name = nvpair_name(pair); 1405 atp = strchr(name, '@'); 1406 (void) strlcpy(dsname, name, atp - name + 1); 1407 VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); 1408 1409 dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); 1410 if (ddsa->ddsa_props != NULL) { 1411 dsl_props_set_sync_impl(ds->ds_prev, 1412 ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); 1413 } 1414 dsl_dataset_rele(ds, FTAG); 1415 } 1416} 1417 1418/* 1419 * The snapshots must all be in the same pool. 1420 * All-or-nothing: if there are any failures, nothing will be modified. 1421 */ 1422int 1423dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) 1424{ 1425 dsl_dataset_snapshot_arg_t ddsa; 1426 nvpair_t *pair; 1427 boolean_t needsuspend; 1428 int error; 1429 spa_t *spa; 1430 char *firstname; 1431 nvlist_t *suspended = NULL; 1432 1433 pair = nvlist_next_nvpair(snaps, NULL); 1434 if (pair == NULL) 1435 return (0); 1436 firstname = nvpair_name(pair); 1437 1438 error = spa_open(firstname, &spa, FTAG); 1439 if (error != 0) 1440 return (error); 1441 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1442 spa_close(spa, FTAG); 1443 1444 if (needsuspend) { 1445 suspended = fnvlist_alloc(); 1446 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1447 pair = nvlist_next_nvpair(snaps, pair)) { 1448 char fsname[MAXNAMELEN]; 1449 char *snapname = nvpair_name(pair); 1450 char *atp; 1451 void *cookie; 1452 1453 atp = strchr(snapname, '@'); 1454 if (atp == NULL) { 1455 error = SET_ERROR(EINVAL); 1456 break; 1457 } 1458 (void) strlcpy(fsname, snapname, atp - snapname + 1); 1459 1460 error = zil_suspend(fsname, &cookie); 1461 if (error != 0) 1462 break; 1463 fnvlist_add_uint64(suspended, fsname, 1464 (uintptr_t)cookie); 1465 } 1466 } 1467 1468 ddsa.ddsa_snaps = snaps; 1469 ddsa.ddsa_props = props; 1470 ddsa.ddsa_errors = errors; 1471 ddsa.ddsa_cr = CRED(); 1472 1473 if (error == 0) { 1474 error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, 1475 dsl_dataset_snapshot_sync, &ddsa, 1476 fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL); 1477 } 1478 1479 if (suspended != NULL) { 1480 for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; 1481 pair = nvlist_next_nvpair(suspended, pair)) { 1482 zil_resume((void *)(uintptr_t) 1483 fnvpair_value_uint64(pair)); 1484 } 1485 fnvlist_free(suspended); 1486 } 1487 1488#ifdef __FreeBSD__ 1489#ifdef _KERNEL 1490 if (error == 0) { 1491 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1492 pair = nvlist_next_nvpair(snaps, pair)) { 1493 char *snapname = nvpair_name(pair); 1494 zvol_create_minors(snapname); 1495 } 1496 } 1497#endif 1498#endif 1499 return (error); 1500} 1501 1502typedef struct dsl_dataset_snapshot_tmp_arg { 1503 const char *ddsta_fsname; 1504 const char *ddsta_snapname; 1505 minor_t ddsta_cleanup_minor; 1506 const char *ddsta_htag; 1507} dsl_dataset_snapshot_tmp_arg_t; 1508 1509static int 1510dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) 1511{ 1512 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1513 dsl_pool_t *dp = dmu_tx_pool(tx); 1514 dsl_dataset_t *ds; 1515 int error; 1516 1517 error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); 1518 if (error != 0) 1519 return (error); 1520 1521 /* NULL cred means no limit check for tmp snapshot */ 1522 error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, 1523 tx, B_FALSE, 0, NULL); 1524 if (error != 0) { 1525 dsl_dataset_rele(ds, FTAG); 1526 return (error); 1527 } 1528 1529 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1530 dsl_dataset_rele(ds, FTAG); 1531 return (SET_ERROR(ENOTSUP)); 1532 } 1533 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1534 B_TRUE, tx); 1535 if (error != 0) { 1536 dsl_dataset_rele(ds, FTAG); 1537 return (error); 1538 } 1539 1540 dsl_dataset_rele(ds, FTAG); 1541 return (0); 1542} 1543 1544static void 1545dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1546{ 1547 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1548 dsl_pool_t *dp = dmu_tx_pool(tx); 1549 dsl_dataset_t *ds; 1550 1551 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1552 1553 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1554 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1555 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1556 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1557 1558 dsl_dataset_rele(ds, FTAG); 1559} 1560 1561int 1562dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1563 minor_t cleanup_minor, const char *htag) 1564{ 1565 dsl_dataset_snapshot_tmp_arg_t ddsta; 1566 int error; 1567 spa_t *spa; 1568 boolean_t needsuspend; 1569 void *cookie; 1570 1571 ddsta.ddsta_fsname = fsname; 1572 ddsta.ddsta_snapname = snapname; 1573 ddsta.ddsta_cleanup_minor = cleanup_minor; 1574 ddsta.ddsta_htag = htag; 1575 1576 error = spa_open(fsname, &spa, FTAG); 1577 if (error != 0) 1578 return (error); 1579 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1580 spa_close(spa, FTAG); 1581 1582 if (needsuspend) { 1583 error = zil_suspend(fsname, &cookie); 1584 if (error != 0) 1585 return (error); 1586 } 1587 1588 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1589 dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED); 1590 1591 if (needsuspend) 1592 zil_resume(cookie); 1593 return (error); 1594} 1595 1596 1597void 1598dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1599{ 1600 ASSERT(dmu_tx_is_syncing(tx)); 1601 ASSERT(ds->ds_objset != NULL); 1602 ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0); 1603 1604 /* 1605 * in case we had to change ds_fsid_guid when we opened it, 1606 * sync it out now. 1607 */ 1608 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1609 dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid; 1610 1611 dmu_objset_sync(ds->ds_objset, zio, tx); 1612 1613 if (ds->ds_need_large_blocks && !ds->ds_large_blocks) { 1614 dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx); 1615 ds->ds_large_blocks = B_TRUE; 1616 } 1617} 1618 1619static void 1620get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1621{ 1622 uint64_t count = 0; 1623 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1624 zap_cursor_t zc; 1625 zap_attribute_t za; 1626 nvlist_t *propval = fnvlist_alloc(); 1627 nvlist_t *val = fnvlist_alloc(); 1628 1629 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1630 1631 /* 1632 * There may be missing entries in ds_next_clones_obj 1633 * due to a bug in a previous version of the code. 1634 * Only trust it if it has the right number of entries. 1635 */ 1636 if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { 1637 VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 1638 &count)); 1639 } 1640 if (count != dsl_dataset_phys(ds)->ds_num_children - 1) 1641 goto fail; 1642 for (zap_cursor_init(&zc, mos, 1643 dsl_dataset_phys(ds)->ds_next_clones_obj); 1644 zap_cursor_retrieve(&zc, &za) == 0; 1645 zap_cursor_advance(&zc)) { 1646 dsl_dataset_t *clone; 1647 char buf[ZFS_MAXNAMELEN]; 1648 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1649 za.za_first_integer, FTAG, &clone)); 1650 dsl_dir_name(clone->ds_dir, buf); 1651 fnvlist_add_boolean(val, buf); 1652 dsl_dataset_rele(clone, FTAG); 1653 } 1654 zap_cursor_fini(&zc); 1655 fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1656 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1657fail: 1658 nvlist_free(val); 1659 nvlist_free(propval); 1660} 1661 1662void 1663dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1664{ 1665 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1666 uint64_t refd, avail, uobjs, aobjs, ratio; 1667 1668 ASSERT(dsl_pool_config_held(dp)); 1669 1670 ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 : 1671 (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 / 1672 dsl_dataset_phys(ds)->ds_compressed_bytes); 1673 1674 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1675 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1676 dsl_dataset_phys(ds)->ds_uncompressed_bytes); 1677 1678 if (dsl_dataset_is_snapshot(ds)) { 1679 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1680 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1681 dsl_dataset_phys(ds)->ds_unique_bytes); 1682 get_clones_stat(ds, nv); 1683 } else { 1684 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { 1685 char buf[MAXNAMELEN]; 1686 dsl_dataset_name(ds->ds_prev, buf); 1687 dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); 1688 } 1689 1690 dsl_dir_stats(ds->ds_dir, nv); 1691 } 1692 1693 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1694 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1695 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1696 1697 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1698 dsl_dataset_phys(ds)->ds_creation_time); 1699 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1700 dsl_dataset_phys(ds)->ds_creation_txg); 1701 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1702 ds->ds_quota); 1703 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1704 ds->ds_reserved); 1705 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1706 dsl_dataset_phys(ds)->ds_guid); 1707 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1708 dsl_dataset_phys(ds)->ds_unique_bytes); 1709 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1710 ds->ds_object); 1711 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1712 ds->ds_userrefs); 1713 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1714 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1715 1716 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 1717 uint64_t written, comp, uncomp; 1718 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1719 dsl_dataset_t *prev; 1720 1721 int err = dsl_dataset_hold_obj(dp, 1722 dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); 1723 if (err == 0) { 1724 err = dsl_dataset_space_written(prev, ds, &written, 1725 &comp, &uncomp); 1726 dsl_dataset_rele(prev, FTAG); 1727 if (err == 0) { 1728 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1729 written); 1730 } 1731 } 1732 } 1733} 1734 1735void 1736dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1737{ 1738 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1739 ASSERT(dsl_pool_config_held(dp)); 1740 1741 stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg; 1742 stat->dds_inconsistent = 1743 dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT; 1744 stat->dds_guid = dsl_dataset_phys(ds)->ds_guid; 1745 stat->dds_origin[0] = '\0'; 1746 if (dsl_dataset_is_snapshot(ds)) { 1747 stat->dds_is_snapshot = B_TRUE; 1748 stat->dds_num_clones = 1749 dsl_dataset_phys(ds)->ds_num_children - 1; 1750 } else { 1751 stat->dds_is_snapshot = B_FALSE; 1752 stat->dds_num_clones = 0; 1753 1754 if (dsl_dir_is_clone(ds->ds_dir)) { 1755 dsl_dataset_t *ods; 1756 1757 VERIFY0(dsl_dataset_hold_obj(dp, 1758 dsl_dir_phys(ds->ds_dir)->dd_origin_obj, 1759 FTAG, &ods)); 1760 dsl_dataset_name(ods, stat->dds_origin); 1761 dsl_dataset_rele(ods, FTAG); 1762 } 1763 } 1764} 1765 1766uint64_t 1767dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1768{ 1769 return (ds->ds_fsid_guid); 1770} 1771 1772void 1773dsl_dataset_space(dsl_dataset_t *ds, 1774 uint64_t *refdbytesp, uint64_t *availbytesp, 1775 uint64_t *usedobjsp, uint64_t *availobjsp) 1776{ 1777 *refdbytesp = dsl_dataset_phys(ds)->ds_referenced_bytes; 1778 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1779 if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) 1780 *availbytesp += 1781 ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes; 1782 if (ds->ds_quota != 0) { 1783 /* 1784 * Adjust available bytes according to refquota 1785 */ 1786 if (*refdbytesp < ds->ds_quota) 1787 *availbytesp = MIN(*availbytesp, 1788 ds->ds_quota - *refdbytesp); 1789 else 1790 *availbytesp = 0; 1791 } 1792 *usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp); 1793 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1794} 1795 1796boolean_t 1797dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) 1798{ 1799 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1800 1801 ASSERT(dsl_pool_config_held(dp)); 1802 if (snap == NULL) 1803 return (B_FALSE); 1804 if (dsl_dataset_phys(ds)->ds_bp.blk_birth > 1805 dsl_dataset_phys(snap)->ds_creation_txg) { 1806 objset_t *os, *os_snap; 1807 /* 1808 * It may be that only the ZIL differs, because it was 1809 * reset in the head. Don't count that as being 1810 * modified. 1811 */ 1812 if (dmu_objset_from_ds(ds, &os) != 0) 1813 return (B_TRUE); 1814 if (dmu_objset_from_ds(snap, &os_snap) != 0) 1815 return (B_TRUE); 1816 return (bcmp(&os->os_phys->os_meta_dnode, 1817 &os_snap->os_phys->os_meta_dnode, 1818 sizeof (os->os_phys->os_meta_dnode)) != 0); 1819 } 1820 return (B_FALSE); 1821} 1822 1823typedef struct dsl_dataset_rename_snapshot_arg { 1824 const char *ddrsa_fsname; 1825 const char *ddrsa_oldsnapname; 1826 const char *ddrsa_newsnapname; 1827 boolean_t ddrsa_recursive; 1828 dmu_tx_t *ddrsa_tx; 1829} dsl_dataset_rename_snapshot_arg_t; 1830 1831/* ARGSUSED */ 1832static int 1833dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 1834 dsl_dataset_t *hds, void *arg) 1835{ 1836 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1837 int error; 1838 uint64_t val; 1839 1840 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1841 if (error != 0) { 1842 /* ignore nonexistent snapshots */ 1843 return (error == ENOENT ? 0 : error); 1844 } 1845 1846 /* new name should not exist */ 1847 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 1848 if (error == 0) 1849 error = SET_ERROR(EEXIST); 1850 else if (error == ENOENT) 1851 error = 0; 1852 1853 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1854 if (dsl_dir_namelen(hds->ds_dir) + 1 + 1855 strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) 1856 error = SET_ERROR(ENAMETOOLONG); 1857 1858 return (error); 1859} 1860 1861static int 1862dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 1863{ 1864 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1865 dsl_pool_t *dp = dmu_tx_pool(tx); 1866 dsl_dataset_t *hds; 1867 int error; 1868 1869 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 1870 if (error != 0) 1871 return (error); 1872 1873 if (ddrsa->ddrsa_recursive) { 1874 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1875 dsl_dataset_rename_snapshot_check_impl, ddrsa, 1876 DS_FIND_CHILDREN); 1877 } else { 1878 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 1879 } 1880 dsl_dataset_rele(hds, FTAG); 1881 return (error); 1882} 1883 1884static int 1885dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 1886 dsl_dataset_t *hds, void *arg) 1887{ 1888#ifdef __FreeBSD__ 1889#ifdef _KERNEL 1890 char *oldname, *newname; 1891#endif 1892#endif 1893 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1894 dsl_dataset_t *ds; 1895 uint64_t val; 1896 dmu_tx_t *tx = ddrsa->ddrsa_tx; 1897 int error; 1898 1899 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1900 ASSERT(error == 0 || error == ENOENT); 1901 if (error == ENOENT) { 1902 /* ignore nonexistent snapshots */ 1903 return (0); 1904 } 1905 1906 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 1907 1908 /* log before we change the name */ 1909 spa_history_log_internal_ds(ds, "rename", tx, 1910 "-> @%s", ddrsa->ddrsa_newsnapname); 1911 1912 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx, 1913 B_FALSE)); 1914 mutex_enter(&ds->ds_lock); 1915 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 1916 mutex_exit(&ds->ds_lock); 1917 VERIFY0(zap_add(dp->dp_meta_objset, 1918 dsl_dataset_phys(hds)->ds_snapnames_zapobj, 1919 ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 1920 1921#ifdef __FreeBSD__ 1922#ifdef _KERNEL 1923 oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1924 newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1925 snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1926 ddrsa->ddrsa_oldsnapname); 1927 snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1928 ddrsa->ddrsa_newsnapname); 1929 zfsvfs_update_fromname(oldname, newname); 1930 zvol_rename_minors(oldname, newname); 1931 kmem_free(newname, MAXPATHLEN); 1932 kmem_free(oldname, MAXPATHLEN); 1933#endif 1934#endif 1935 dsl_dataset_rele(ds, FTAG); 1936 1937 return (0); 1938} 1939 1940static void 1941dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 1942{ 1943 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1944 dsl_pool_t *dp = dmu_tx_pool(tx); 1945 dsl_dataset_t *hds; 1946 1947 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 1948 ddrsa->ddrsa_tx = tx; 1949 if (ddrsa->ddrsa_recursive) { 1950 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1951 dsl_dataset_rename_snapshot_sync_impl, ddrsa, 1952 DS_FIND_CHILDREN)); 1953 } else { 1954 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 1955 } 1956 dsl_dataset_rele(hds, FTAG); 1957} 1958 1959int 1960dsl_dataset_rename_snapshot(const char *fsname, 1961 const char *oldsnapname, const char *newsnapname, boolean_t recursive) 1962{ 1963 dsl_dataset_rename_snapshot_arg_t ddrsa; 1964 1965 ddrsa.ddrsa_fsname = fsname; 1966 ddrsa.ddrsa_oldsnapname = oldsnapname; 1967 ddrsa.ddrsa_newsnapname = newsnapname; 1968 ddrsa.ddrsa_recursive = recursive; 1969 1970 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 1971 dsl_dataset_rename_snapshot_sync, &ddrsa, 1972 1, ZFS_SPACE_CHECK_RESERVED)); 1973} 1974 1975/* 1976 * If we're doing an ownership handoff, we need to make sure that there is 1977 * only one long hold on the dataset. We're not allowed to change anything here 1978 * so we don't permanently release the long hold or regular hold here. We want 1979 * to do this only when syncing to avoid the dataset unexpectedly going away 1980 * when we release the long hold. 1981 */ 1982static int 1983dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 1984{ 1985 boolean_t held; 1986 1987 if (!dmu_tx_is_syncing(tx)) 1988 return (0); 1989 1990 if (owner != NULL) { 1991 VERIFY3P(ds->ds_owner, ==, owner); 1992 dsl_dataset_long_rele(ds, owner); 1993 } 1994 1995 held = dsl_dataset_long_held(ds); 1996 1997 if (owner != NULL) 1998 dsl_dataset_long_hold(ds, owner); 1999 2000 if (held) 2001 return (SET_ERROR(EBUSY)); 2002 2003 return (0); 2004} 2005 2006typedef struct dsl_dataset_rollback_arg { 2007 const char *ddra_fsname; 2008 void *ddra_owner; 2009 nvlist_t *ddra_result; 2010} dsl_dataset_rollback_arg_t; 2011 2012static int 2013dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 2014{ 2015 dsl_dataset_rollback_arg_t *ddra = arg; 2016 dsl_pool_t *dp = dmu_tx_pool(tx); 2017 dsl_dataset_t *ds; 2018 int64_t unused_refres_delta; 2019 int error; 2020 2021 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 2022 if (error != 0) 2023 return (error); 2024 2025 /* must not be a snapshot */ 2026 if (dsl_dataset_is_snapshot(ds)) { 2027 dsl_dataset_rele(ds, FTAG); 2028 return (SET_ERROR(EINVAL)); 2029 } 2030 2031 /* must have a most recent snapshot */ 2032 if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) { 2033 dsl_dataset_rele(ds, FTAG); 2034 return (SET_ERROR(EINVAL)); 2035 } 2036 2037 /* must not have any bookmarks after the most recent snapshot */ 2038 nvlist_t *proprequest = fnvlist_alloc(); 2039 fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); 2040 nvlist_t *bookmarks = fnvlist_alloc(); 2041 error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); 2042 fnvlist_free(proprequest); 2043 if (error != 0) 2044 return (error); 2045 for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL); 2046 pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) { 2047 nvlist_t *valuenv = 2048 fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), 2049 zfs_prop_to_name(ZFS_PROP_CREATETXG)); 2050 uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); 2051 if (createtxg > dsl_dataset_phys(ds)->ds_prev_snap_txg) { 2052 fnvlist_free(bookmarks); 2053 dsl_dataset_rele(ds, FTAG); 2054 return (SET_ERROR(EEXIST)); 2055 } 2056 } 2057 fnvlist_free(bookmarks); 2058 2059 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 2060 if (error != 0) { 2061 dsl_dataset_rele(ds, FTAG); 2062 return (error); 2063 } 2064 2065 /* 2066 * Check if the snap we are rolling back to uses more than 2067 * the refquota. 2068 */ 2069 if (ds->ds_quota != 0 && 2070 dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes > ds->ds_quota) { 2071 dsl_dataset_rele(ds, FTAG); 2072 return (SET_ERROR(EDQUOT)); 2073 } 2074 2075 /* 2076 * When we do the clone swap, we will temporarily use more space 2077 * due to the refreservation (the head will no longer have any 2078 * unique space, so the entire amount of the refreservation will need 2079 * to be free). We will immediately destroy the clone, freeing 2080 * this space, but the freeing happens over many txg's. 2081 */ 2082 unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 2083 dsl_dataset_phys(ds)->ds_unique_bytes); 2084 2085 if (unused_refres_delta > 0 && 2086 unused_refres_delta > 2087 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 2088 dsl_dataset_rele(ds, FTAG); 2089 return (SET_ERROR(ENOSPC)); 2090 } 2091 2092 dsl_dataset_rele(ds, FTAG); 2093 return (0); 2094} 2095 2096static void 2097dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 2098{ 2099 dsl_dataset_rollback_arg_t *ddra = arg; 2100 dsl_pool_t *dp = dmu_tx_pool(tx); 2101 dsl_dataset_t *ds, *clone; 2102 uint64_t cloneobj; 2103 char namebuf[ZFS_MAXNAMELEN]; 2104 2105 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 2106 2107 dsl_dataset_name(ds->ds_prev, namebuf); 2108 fnvlist_add_string(ddra->ddra_result, "target", namebuf); 2109 2110 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 2111 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 2112 2113 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 2114 2115 dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 2116 dsl_dataset_zero_zil(ds, tx); 2117 2118 dsl_destroy_head_sync_impl(clone, tx); 2119 2120 dsl_dataset_rele(clone, FTAG); 2121 dsl_dataset_rele(ds, FTAG); 2122} 2123 2124/* 2125 * Rolls back the given filesystem or volume to the most recent snapshot. 2126 * The name of the most recent snapshot will be returned under key "target" 2127 * in the result nvlist. 2128 * 2129 * If owner != NULL: 2130 * - The existing dataset MUST be owned by the specified owner at entry 2131 * - Upon return, dataset will still be held by the same owner, whether we 2132 * succeed or not. 2133 * 2134 * This mode is required any time the existing filesystem is mounted. See 2135 * notes above zfs_suspend_fs() for further details. 2136 */ 2137int 2138dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result) 2139{ 2140 dsl_dataset_rollback_arg_t ddra; 2141 2142 ddra.ddra_fsname = fsname; 2143 ddra.ddra_owner = owner; 2144 ddra.ddra_result = result; 2145 2146 return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 2147 dsl_dataset_rollback_sync, &ddra, 2148 1, ZFS_SPACE_CHECK_RESERVED)); 2149} 2150 2151struct promotenode { 2152 list_node_t link; 2153 dsl_dataset_t *ds; 2154}; 2155 2156typedef struct dsl_dataset_promote_arg { 2157 const char *ddpa_clonename; 2158 dsl_dataset_t *ddpa_clone; 2159 list_t shared_snaps, origin_snaps, clone_snaps; 2160 dsl_dataset_t *origin_origin; /* origin of the origin */ 2161 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2162 char *err_ds; 2163 cred_t *cr; 2164} dsl_dataset_promote_arg_t; 2165 2166static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2167static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 2168 void *tag); 2169static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 2170 2171static int 2172dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 2173{ 2174 dsl_dataset_promote_arg_t *ddpa = arg; 2175 dsl_pool_t *dp = dmu_tx_pool(tx); 2176 dsl_dataset_t *hds; 2177 struct promotenode *snap; 2178 dsl_dataset_t *origin_ds; 2179 int err; 2180 uint64_t unused; 2181 uint64_t ss_mv_cnt; 2182 size_t max_snap_len; 2183 2184 err = promote_hold(ddpa, dp, FTAG); 2185 if (err != 0) 2186 return (err); 2187 2188 hds = ddpa->ddpa_clone; 2189 max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1; 2190 2191 if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) { 2192 promote_rele(ddpa, FTAG); 2193 return (SET_ERROR(EXDEV)); 2194 } 2195 2196 /* 2197 * Compute and check the amount of space to transfer. Since this is 2198 * so expensive, don't do the preliminary check. 2199 */ 2200 if (!dmu_tx_is_syncing(tx)) { 2201 promote_rele(ddpa, FTAG); 2202 return (0); 2203 } 2204 2205 snap = list_head(&ddpa->shared_snaps); 2206 origin_ds = snap->ds; 2207 2208 /* compute origin's new unique space */ 2209 snap = list_tail(&ddpa->clone_snaps); 2210 ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, 2211 origin_ds->ds_object); 2212 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2213 dsl_dataset_phys(origin_ds)->ds_prev_snap_txg, UINT64_MAX, 2214 &ddpa->unique, &unused, &unused); 2215 2216 /* 2217 * Walk the snapshots that we are moving 2218 * 2219 * Compute space to transfer. Consider the incremental changes 2220 * to used by each snapshot: 2221 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2222 * So each snapshot gave birth to: 2223 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2224 * So a sequence would look like: 2225 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2226 * Which simplifies to: 2227 * uN + kN + kN-1 + ... + k1 + k0 2228 * Note however, if we stop before we reach the ORIGIN we get: 2229 * uN + kN + kN-1 + ... + kM - uM-1 2230 */ 2231 ss_mv_cnt = 0; 2232 ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes; 2233 ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes; 2234 ddpa->uncomp = dsl_dataset_phys(origin_ds)->ds_uncompressed_bytes; 2235 for (snap = list_head(&ddpa->shared_snaps); snap; 2236 snap = list_next(&ddpa->shared_snaps, snap)) { 2237 uint64_t val, dlused, dlcomp, dluncomp; 2238 dsl_dataset_t *ds = snap->ds; 2239 2240 ss_mv_cnt++; 2241 2242 /* 2243 * If there are long holds, we won't be able to evict 2244 * the objset. 2245 */ 2246 if (dsl_dataset_long_held(ds)) { 2247 err = SET_ERROR(EBUSY); 2248 goto out; 2249 } 2250 2251 /* Check that the snapshot name does not conflict */ 2252 VERIFY0(dsl_dataset_get_snapname(ds)); 2253 if (strlen(ds->ds_snapname) >= max_snap_len) { 2254 err = SET_ERROR(ENAMETOOLONG); 2255 goto out; 2256 } 2257 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2258 if (err == 0) { 2259 (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 2260 err = SET_ERROR(EEXIST); 2261 goto out; 2262 } 2263 if (err != ENOENT) 2264 goto out; 2265 2266 /* The very first snapshot does not have a deadlist */ 2267 if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0) 2268 continue; 2269 2270 dsl_deadlist_space(&ds->ds_deadlist, 2271 &dlused, &dlcomp, &dluncomp); 2272 ddpa->used += dlused; 2273 ddpa->comp += dlcomp; 2274 ddpa->uncomp += dluncomp; 2275 } 2276 2277 /* 2278 * If we are a clone of a clone then we never reached ORIGIN, 2279 * so we need to subtract out the clone origin's used space. 2280 */ 2281 if (ddpa->origin_origin) { 2282 ddpa->used -= 2283 dsl_dataset_phys(ddpa->origin_origin)->ds_referenced_bytes; 2284 ddpa->comp -= 2285 dsl_dataset_phys(ddpa->origin_origin)->ds_compressed_bytes; 2286 ddpa->uncomp -= 2287 dsl_dataset_phys(ddpa->origin_origin)-> 2288 ds_uncompressed_bytes; 2289 } 2290 2291 /* Check that there is enough space and limit headroom here */ 2292 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2293 0, ss_mv_cnt, ddpa->used, ddpa->cr); 2294 if (err != 0) 2295 goto out; 2296 2297 /* 2298 * Compute the amounts of space that will be used by snapshots 2299 * after the promotion (for both origin and clone). For each, 2300 * it is the amount of space that will be on all of their 2301 * deadlists (that was not born before their new origin). 2302 */ 2303 if (dsl_dir_phys(hds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2304 uint64_t space; 2305 2306 /* 2307 * Note, typically this will not be a clone of a clone, 2308 * so dd_origin_txg will be < TXG_INITIAL, so 2309 * these snaplist_space() -> dsl_deadlist_space_range() 2310 * calls will be fast because they do not have to 2311 * iterate over all bps. 2312 */ 2313 snap = list_head(&ddpa->origin_snaps); 2314 err = snaplist_space(&ddpa->shared_snaps, 2315 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2316 if (err != 0) 2317 goto out; 2318 2319 err = snaplist_space(&ddpa->clone_snaps, 2320 snap->ds->ds_dir->dd_origin_txg, &space); 2321 if (err != 0) 2322 goto out; 2323 ddpa->cloneusedsnap += space; 2324 } 2325 if (dsl_dir_phys(origin_ds->ds_dir)->dd_flags & 2326 DD_FLAG_USED_BREAKDOWN) { 2327 err = snaplist_space(&ddpa->origin_snaps, 2328 dsl_dataset_phys(origin_ds)->ds_creation_txg, 2329 &ddpa->originusedsnap); 2330 if (err != 0) 2331 goto out; 2332 } 2333 2334out: 2335 promote_rele(ddpa, FTAG); 2336 return (err); 2337} 2338 2339static void 2340dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 2341{ 2342 dsl_dataset_promote_arg_t *ddpa = arg; 2343 dsl_pool_t *dp = dmu_tx_pool(tx); 2344 dsl_dataset_t *hds; 2345 struct promotenode *snap; 2346 dsl_dataset_t *origin_ds; 2347 dsl_dataset_t *origin_head; 2348 dsl_dir_t *dd; 2349 dsl_dir_t *odd = NULL; 2350 uint64_t oldnext_obj; 2351 int64_t delta; 2352#if defined(__FreeBSD__) && defined(_KERNEL) 2353 char *oldname, *newname; 2354#endif 2355 2356 VERIFY0(promote_hold(ddpa, dp, FTAG)); 2357 hds = ddpa->ddpa_clone; 2358 2359 ASSERT0(dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE); 2360 2361 snap = list_head(&ddpa->shared_snaps); 2362 origin_ds = snap->ds; 2363 dd = hds->ds_dir; 2364 2365 snap = list_head(&ddpa->origin_snaps); 2366 origin_head = snap->ds; 2367 2368 /* 2369 * We need to explicitly open odd, since origin_ds's dd will be 2370 * changing. 2371 */ 2372 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2373 NULL, FTAG, &odd)); 2374 2375 /* change origin's next snap */ 2376 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2377 oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj; 2378 snap = list_tail(&ddpa->clone_snaps); 2379 ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, 2380 origin_ds->ds_object); 2381 dsl_dataset_phys(origin_ds)->ds_next_snap_obj = snap->ds->ds_object; 2382 2383 /* change the origin's next clone */ 2384 if (dsl_dataset_phys(origin_ds)->ds_next_clones_obj) { 2385 dsl_dataset_remove_from_next_clones(origin_ds, 2386 snap->ds->ds_object, tx); 2387 VERIFY0(zap_add_int(dp->dp_meta_objset, 2388 dsl_dataset_phys(origin_ds)->ds_next_clones_obj, 2389 oldnext_obj, tx)); 2390 } 2391 2392 /* change origin */ 2393 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2394 ASSERT3U(dsl_dir_phys(dd)->dd_origin_obj, ==, origin_ds->ds_object); 2395 dsl_dir_phys(dd)->dd_origin_obj = dsl_dir_phys(odd)->dd_origin_obj; 2396 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2397 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2398 dsl_dir_phys(odd)->dd_origin_obj = origin_ds->ds_object; 2399 origin_head->ds_dir->dd_origin_txg = 2400 dsl_dataset_phys(origin_ds)->ds_creation_txg; 2401 2402 /* change dd_clone entries */ 2403 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2404 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2405 dsl_dir_phys(odd)->dd_clones, hds->ds_object, tx)); 2406 VERIFY0(zap_add_int(dp->dp_meta_objset, 2407 dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, 2408 hds->ds_object, tx)); 2409 2410 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2411 dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, 2412 origin_head->ds_object, tx)); 2413 if (dsl_dir_phys(dd)->dd_clones == 0) { 2414 dsl_dir_phys(dd)->dd_clones = 2415 zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES, 2416 DMU_OT_NONE, 0, tx); 2417 } 2418 VERIFY0(zap_add_int(dp->dp_meta_objset, 2419 dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx)); 2420 } 2421 2422#if defined(__FreeBSD__) && defined(_KERNEL) 2423 /* Take the spa_namespace_lock early so zvol renames don't deadlock. */ 2424 mutex_enter(&spa_namespace_lock); 2425 2426 oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2427 newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2428#endif 2429 2430 /* move snapshots to this dir */ 2431 for (snap = list_head(&ddpa->shared_snaps); snap; 2432 snap = list_next(&ddpa->shared_snaps, snap)) { 2433 dsl_dataset_t *ds = snap->ds; 2434 2435 /* 2436 * Property callbacks are registered to a particular 2437 * dsl_dir. Since ours is changing, evict the objset 2438 * so that they will be unregistered from the old dsl_dir. 2439 */ 2440 if (ds->ds_objset) { 2441 dmu_objset_evict(ds->ds_objset); 2442 ds->ds_objset = NULL; 2443 } 2444 2445 /* move snap name entry */ 2446 VERIFY0(dsl_dataset_get_snapname(ds)); 2447 VERIFY0(dsl_dataset_snap_remove(origin_head, 2448 ds->ds_snapname, tx, B_TRUE)); 2449 VERIFY0(zap_add(dp->dp_meta_objset, 2450 dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname, 2451 8, 1, &ds->ds_object, tx)); 2452 dsl_fs_ss_count_adjust(hds->ds_dir, 1, 2453 DD_FIELD_SNAPSHOT_COUNT, tx); 2454 2455 /* change containing dsl_dir */ 2456 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2457 ASSERT3U(dsl_dataset_phys(ds)->ds_dir_obj, ==, odd->dd_object); 2458 dsl_dataset_phys(ds)->ds_dir_obj = dd->dd_object; 2459 ASSERT3P(ds->ds_dir, ==, odd); 2460 dsl_dir_rele(ds->ds_dir, ds); 2461 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2462 NULL, ds, &ds->ds_dir)); 2463 2464#if defined(__FreeBSD__) && defined(_KERNEL) 2465 dsl_dataset_name(ds, newname); 2466 zfsvfs_update_fromname(oldname, newname); 2467 zvol_rename_minors(oldname, newname); 2468#endif 2469 2470 /* move any clone references */ 2471 if (dsl_dataset_phys(ds)->ds_next_clones_obj && 2472 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2473 zap_cursor_t zc; 2474 zap_attribute_t za; 2475 2476 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2477 dsl_dataset_phys(ds)->ds_next_clones_obj); 2478 zap_cursor_retrieve(&zc, &za) == 0; 2479 zap_cursor_advance(&zc)) { 2480 dsl_dataset_t *cnds; 2481 uint64_t o; 2482 2483 if (za.za_first_integer == oldnext_obj) { 2484 /* 2485 * We've already moved the 2486 * origin's reference. 2487 */ 2488 continue; 2489 } 2490 2491 VERIFY0(dsl_dataset_hold_obj(dp, 2492 za.za_first_integer, FTAG, &cnds)); 2493 o = dsl_dir_phys(cnds->ds_dir)-> 2494 dd_head_dataset_obj; 2495 2496 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2497 dsl_dir_phys(odd)->dd_clones, o, tx)); 2498 VERIFY0(zap_add_int(dp->dp_meta_objset, 2499 dsl_dir_phys(dd)->dd_clones, o, tx)); 2500 dsl_dataset_rele(cnds, FTAG); 2501 } 2502 zap_cursor_fini(&zc); 2503 } 2504 2505 ASSERT(!dsl_prop_hascb(ds)); 2506 } 2507 2508#if defined(__FreeBSD__) && defined(_KERNEL) 2509 mutex_exit(&spa_namespace_lock); 2510 2511 kmem_free(newname, MAXPATHLEN); 2512 kmem_free(oldname, MAXPATHLEN); 2513#endif 2514 /* 2515 * Change space accounting. 2516 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2517 * both be valid, or both be 0 (resulting in delta == 0). This 2518 * is true for each of {clone,origin} independently. 2519 */ 2520 2521 delta = ddpa->cloneusedsnap - 2522 dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]; 2523 ASSERT3S(delta, >=, 0); 2524 ASSERT3U(ddpa->used, >=, delta); 2525 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2526 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2527 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2528 2529 delta = ddpa->originusedsnap - 2530 dsl_dir_phys(odd)->dd_used_breakdown[DD_USED_SNAP]; 2531 ASSERT3S(delta, <=, 0); 2532 ASSERT3U(ddpa->used, >=, -delta); 2533 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2534 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2535 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2536 2537 dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique; 2538 2539 /* log history record */ 2540 spa_history_log_internal_ds(hds, "promote", tx, ""); 2541 2542 dsl_dir_rele(odd, FTAG); 2543 promote_rele(ddpa, FTAG); 2544} 2545 2546/* 2547 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2548 * (exclusive) and last_obj (inclusive). The list will be in reverse 2549 * order (last_obj will be the list_head()). If first_obj == 0, do all 2550 * snapshots back to this dataset's origin. 2551 */ 2552static int 2553snaplist_make(dsl_pool_t *dp, 2554 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2555{ 2556 uint64_t obj = last_obj; 2557 2558 list_create(l, sizeof (struct promotenode), 2559 offsetof(struct promotenode, link)); 2560 2561 while (obj != first_obj) { 2562 dsl_dataset_t *ds; 2563 struct promotenode *snap; 2564 int err; 2565 2566 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2567 ASSERT(err != ENOENT); 2568 if (err != 0) 2569 return (err); 2570 2571 if (first_obj == 0) 2572 first_obj = dsl_dir_phys(ds->ds_dir)->dd_origin_obj; 2573 2574 snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2575 snap->ds = ds; 2576 list_insert_tail(l, snap); 2577 obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 2578 } 2579 2580 return (0); 2581} 2582 2583static int 2584snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2585{ 2586 struct promotenode *snap; 2587 2588 *spacep = 0; 2589 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2590 uint64_t used, comp, uncomp; 2591 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2592 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2593 *spacep += used; 2594 } 2595 return (0); 2596} 2597 2598static void 2599snaplist_destroy(list_t *l, void *tag) 2600{ 2601 struct promotenode *snap; 2602 2603 if (l == NULL || !list_link_active(&l->list_head)) 2604 return; 2605 2606 while ((snap = list_tail(l)) != NULL) { 2607 list_remove(l, snap); 2608 dsl_dataset_rele(snap->ds, tag); 2609 kmem_free(snap, sizeof (*snap)); 2610 } 2611 list_destroy(l); 2612} 2613 2614static int 2615promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2616{ 2617 int error; 2618 dsl_dir_t *dd; 2619 struct promotenode *snap; 2620 2621 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2622 &ddpa->ddpa_clone); 2623 if (error != 0) 2624 return (error); 2625 dd = ddpa->ddpa_clone->ds_dir; 2626 2627 if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || 2628 !dsl_dir_is_clone(dd)) { 2629 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2630 return (SET_ERROR(EINVAL)); 2631 } 2632 2633 error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj, 2634 &ddpa->shared_snaps, tag); 2635 if (error != 0) 2636 goto out; 2637 2638 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2639 &ddpa->clone_snaps, tag); 2640 if (error != 0) 2641 goto out; 2642 2643 snap = list_head(&ddpa->shared_snaps); 2644 ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj); 2645 error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj, 2646 dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj, 2647 &ddpa->origin_snaps, tag); 2648 if (error != 0) 2649 goto out; 2650 2651 if (dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj != 0) { 2652 error = dsl_dataset_hold_obj(dp, 2653 dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj, 2654 tag, &ddpa->origin_origin); 2655 if (error != 0) 2656 goto out; 2657 } 2658out: 2659 if (error != 0) 2660 promote_rele(ddpa, tag); 2661 return (error); 2662} 2663 2664static void 2665promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2666{ 2667 snaplist_destroy(&ddpa->shared_snaps, tag); 2668 snaplist_destroy(&ddpa->clone_snaps, tag); 2669 snaplist_destroy(&ddpa->origin_snaps, tag); 2670 if (ddpa->origin_origin != NULL) 2671 dsl_dataset_rele(ddpa->origin_origin, tag); 2672 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2673} 2674 2675/* 2676 * Promote a clone. 2677 * 2678 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2679 * in with the name. (It must be at least MAXNAMELEN bytes long.) 2680 */ 2681int 2682dsl_dataset_promote(const char *name, char *conflsnap) 2683{ 2684 dsl_dataset_promote_arg_t ddpa = { 0 }; 2685 uint64_t numsnaps; 2686 int error; 2687 objset_t *os; 2688 2689 /* 2690 * We will modify space proportional to the number of 2691 * snapshots. Compute numsnaps. 2692 */ 2693 error = dmu_objset_hold(name, FTAG, &os); 2694 if (error != 0) 2695 return (error); 2696 error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2697 dsl_dataset_phys(dmu_objset_ds(os))->ds_snapnames_zapobj, 2698 &numsnaps); 2699 dmu_objset_rele(os, FTAG); 2700 if (error != 0) 2701 return (error); 2702 2703 ddpa.ddpa_clonename = name; 2704 ddpa.err_ds = conflsnap; 2705 ddpa.cr = CRED(); 2706 2707 return (dsl_sync_task(name, dsl_dataset_promote_check, 2708 dsl_dataset_promote_sync, &ddpa, 2709 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED)); 2710} 2711 2712int 2713dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2714 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2715{ 2716 int64_t unused_refres_delta; 2717 2718 /* they should both be heads */ 2719 if (dsl_dataset_is_snapshot(clone) || 2720 dsl_dataset_is_snapshot(origin_head)) 2721 return (SET_ERROR(EINVAL)); 2722 2723 /* if we are not forcing, the branch point should be just before them */ 2724 if (!force && clone->ds_prev != origin_head->ds_prev) 2725 return (SET_ERROR(EINVAL)); 2726 2727 /* clone should be the clone (unless they are unrelated) */ 2728 if (clone->ds_prev != NULL && 2729 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 2730 origin_head->ds_dir != clone->ds_prev->ds_dir) 2731 return (SET_ERROR(EINVAL)); 2732 2733 /* the clone should be a child of the origin */ 2734 if (clone->ds_dir->dd_parent != origin_head->ds_dir) 2735 return (SET_ERROR(EINVAL)); 2736 2737 /* origin_head shouldn't be modified unless 'force' */ 2738 if (!force && 2739 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) 2740 return (SET_ERROR(ETXTBSY)); 2741 2742 /* origin_head should have no long holds (e.g. is not mounted) */ 2743 if (dsl_dataset_handoff_check(origin_head, owner, tx)) 2744 return (SET_ERROR(EBUSY)); 2745 2746 /* check amount of any unconsumed refreservation */ 2747 unused_refres_delta = 2748 (int64_t)MIN(origin_head->ds_reserved, 2749 dsl_dataset_phys(origin_head)->ds_unique_bytes) - 2750 (int64_t)MIN(origin_head->ds_reserved, 2751 dsl_dataset_phys(clone)->ds_unique_bytes); 2752 2753 if (unused_refres_delta > 0 && 2754 unused_refres_delta > 2755 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 2756 return (SET_ERROR(ENOSPC)); 2757 2758 /* clone can't be over the head's refquota */ 2759 if (origin_head->ds_quota != 0 && 2760 dsl_dataset_phys(clone)->ds_referenced_bytes > 2761 origin_head->ds_quota) 2762 return (SET_ERROR(EDQUOT)); 2763 2764 return (0); 2765} 2766 2767void 2768dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 2769 dsl_dataset_t *origin_head, dmu_tx_t *tx) 2770{ 2771 dsl_pool_t *dp = dmu_tx_pool(tx); 2772 int64_t unused_refres_delta; 2773 2774 ASSERT(clone->ds_reserved == 0); 2775 ASSERT(origin_head->ds_quota == 0 || 2776 dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota); 2777 ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); 2778 2779 dmu_buf_will_dirty(clone->ds_dbuf, tx); 2780 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2781 2782 if (clone->ds_objset != NULL) { 2783 dmu_objset_evict(clone->ds_objset); 2784 clone->ds_objset = NULL; 2785 } 2786 2787 if (origin_head->ds_objset != NULL) { 2788 dmu_objset_evict(origin_head->ds_objset); 2789 origin_head->ds_objset = NULL; 2790 } 2791 2792 unused_refres_delta = 2793 (int64_t)MIN(origin_head->ds_reserved, 2794 dsl_dataset_phys(origin_head)->ds_unique_bytes) - 2795 (int64_t)MIN(origin_head->ds_reserved, 2796 dsl_dataset_phys(clone)->ds_unique_bytes); 2797 2798 /* 2799 * Reset origin's unique bytes, if it exists. 2800 */ 2801 if (clone->ds_prev) { 2802 dsl_dataset_t *origin = clone->ds_prev; 2803 uint64_t comp, uncomp; 2804 2805 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2806 dsl_deadlist_space_range(&clone->ds_deadlist, 2807 dsl_dataset_phys(origin)->ds_prev_snap_txg, UINT64_MAX, 2808 &dsl_dataset_phys(origin)->ds_unique_bytes, &comp, &uncomp); 2809 } 2810 2811 /* swap blkptrs */ 2812 { 2813 blkptr_t tmp; 2814 tmp = dsl_dataset_phys(origin_head)->ds_bp; 2815 dsl_dataset_phys(origin_head)->ds_bp = 2816 dsl_dataset_phys(clone)->ds_bp; 2817 dsl_dataset_phys(clone)->ds_bp = tmp; 2818 } 2819 2820 /* set dd_*_bytes */ 2821 { 2822 int64_t dused, dcomp, duncomp; 2823 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2824 uint64_t odl_used, odl_comp, odl_uncomp; 2825 2826 ASSERT3U(dsl_dir_phys(clone->ds_dir)-> 2827 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2828 2829 dsl_deadlist_space(&clone->ds_deadlist, 2830 &cdl_used, &cdl_comp, &cdl_uncomp); 2831 dsl_deadlist_space(&origin_head->ds_deadlist, 2832 &odl_used, &odl_comp, &odl_uncomp); 2833 2834 dused = dsl_dataset_phys(clone)->ds_referenced_bytes + 2835 cdl_used - 2836 (dsl_dataset_phys(origin_head)->ds_referenced_bytes + 2837 odl_used); 2838 dcomp = dsl_dataset_phys(clone)->ds_compressed_bytes + 2839 cdl_comp - 2840 (dsl_dataset_phys(origin_head)->ds_compressed_bytes + 2841 odl_comp); 2842 duncomp = dsl_dataset_phys(clone)->ds_uncompressed_bytes + 2843 cdl_uncomp - 2844 (dsl_dataset_phys(origin_head)->ds_uncompressed_bytes + 2845 odl_uncomp); 2846 2847 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 2848 dused, dcomp, duncomp, tx); 2849 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 2850 -dused, -dcomp, -duncomp, tx); 2851 2852 /* 2853 * The difference in the space used by snapshots is the 2854 * difference in snapshot space due to the head's 2855 * deadlist (since that's the only thing that's 2856 * changing that affects the snapused). 2857 */ 2858 dsl_deadlist_space_range(&clone->ds_deadlist, 2859 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2860 &cdl_used, &cdl_comp, &cdl_uncomp); 2861 dsl_deadlist_space_range(&origin_head->ds_deadlist, 2862 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2863 &odl_used, &odl_comp, &odl_uncomp); 2864 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 2865 DD_USED_HEAD, DD_USED_SNAP, NULL); 2866 } 2867 2868 /* swap ds_*_bytes */ 2869 SWITCH64(dsl_dataset_phys(origin_head)->ds_referenced_bytes, 2870 dsl_dataset_phys(clone)->ds_referenced_bytes); 2871 SWITCH64(dsl_dataset_phys(origin_head)->ds_compressed_bytes, 2872 dsl_dataset_phys(clone)->ds_compressed_bytes); 2873 SWITCH64(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes, 2874 dsl_dataset_phys(clone)->ds_uncompressed_bytes); 2875 SWITCH64(dsl_dataset_phys(origin_head)->ds_unique_bytes, 2876 dsl_dataset_phys(clone)->ds_unique_bytes); 2877 2878 /* apply any parent delta for change in unconsumed refreservation */ 2879 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 2880 unused_refres_delta, 0, 0, tx); 2881 2882 /* 2883 * Swap deadlists. 2884 */ 2885 dsl_deadlist_close(&clone->ds_deadlist); 2886 dsl_deadlist_close(&origin_head->ds_deadlist); 2887 SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj, 2888 dsl_dataset_phys(clone)->ds_deadlist_obj); 2889 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 2890 dsl_dataset_phys(clone)->ds_deadlist_obj); 2891 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 2892 dsl_dataset_phys(origin_head)->ds_deadlist_obj); 2893 2894 dsl_scan_ds_clone_swapped(origin_head, clone, tx); 2895 2896 spa_history_log_internal_ds(clone, "clone swap", tx, 2897 "parent=%s", origin_head->ds_dir->dd_myname); 2898} 2899 2900/* 2901 * Given a pool name and a dataset object number in that pool, 2902 * return the name of that dataset. 2903 */ 2904int 2905dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2906{ 2907 dsl_pool_t *dp; 2908 dsl_dataset_t *ds; 2909 int error; 2910 2911 error = dsl_pool_hold(pname, FTAG, &dp); 2912 if (error != 0) 2913 return (error); 2914 2915 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 2916 if (error == 0) { 2917 dsl_dataset_name(ds, buf); 2918 dsl_dataset_rele(ds, FTAG); 2919 } 2920 dsl_pool_rele(dp, FTAG); 2921 2922 return (error); 2923} 2924 2925int 2926dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2927 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2928{ 2929 int error = 0; 2930 2931 ASSERT3S(asize, >, 0); 2932 2933 /* 2934 * *ref_rsrv is the portion of asize that will come from any 2935 * unconsumed refreservation space. 2936 */ 2937 *ref_rsrv = 0; 2938 2939 mutex_enter(&ds->ds_lock); 2940 /* 2941 * Make a space adjustment for reserved bytes. 2942 */ 2943 if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) { 2944 ASSERT3U(*used, >=, 2945 ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); 2946 *used -= 2947 (ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); 2948 *ref_rsrv = 2949 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2950 } 2951 2952 if (!check_quota || ds->ds_quota == 0) { 2953 mutex_exit(&ds->ds_lock); 2954 return (0); 2955 } 2956 /* 2957 * If they are requesting more space, and our current estimate 2958 * is over quota, they get to try again unless the actual 2959 * on-disk is over quota and there are no pending changes (which 2960 * may free up space for us). 2961 */ 2962 if (dsl_dataset_phys(ds)->ds_referenced_bytes + inflight >= 2963 ds->ds_quota) { 2964 if (inflight > 0 || 2965 dsl_dataset_phys(ds)->ds_referenced_bytes < ds->ds_quota) 2966 error = SET_ERROR(ERESTART); 2967 else 2968 error = SET_ERROR(EDQUOT); 2969 } 2970 mutex_exit(&ds->ds_lock); 2971 2972 return (error); 2973} 2974 2975typedef struct dsl_dataset_set_qr_arg { 2976 const char *ddsqra_name; 2977 zprop_source_t ddsqra_source; 2978 uint64_t ddsqra_value; 2979} dsl_dataset_set_qr_arg_t; 2980 2981 2982/* ARGSUSED */ 2983static int 2984dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 2985{ 2986 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2987 dsl_pool_t *dp = dmu_tx_pool(tx); 2988 dsl_dataset_t *ds; 2989 int error; 2990 uint64_t newval; 2991 2992 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 2993 return (SET_ERROR(ENOTSUP)); 2994 2995 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2996 if (error != 0) 2997 return (error); 2998 2999 if (dsl_dataset_is_snapshot(ds)) { 3000 dsl_dataset_rele(ds, FTAG); 3001 return (SET_ERROR(EINVAL)); 3002 } 3003 3004 error = dsl_prop_predict(ds->ds_dir, 3005 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 3006 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 3007 if (error != 0) { 3008 dsl_dataset_rele(ds, FTAG); 3009 return (error); 3010 } 3011 3012 if (newval == 0) { 3013 dsl_dataset_rele(ds, FTAG); 3014 return (0); 3015 } 3016 3017 if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes || 3018 newval < ds->ds_reserved) { 3019 dsl_dataset_rele(ds, FTAG); 3020 return (SET_ERROR(ENOSPC)); 3021 } 3022 3023 dsl_dataset_rele(ds, FTAG); 3024 return (0); 3025} 3026 3027static void 3028dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 3029{ 3030 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3031 dsl_pool_t *dp = dmu_tx_pool(tx); 3032 dsl_dataset_t *ds; 3033 uint64_t newval; 3034 3035 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3036 3037 dsl_prop_set_sync_impl(ds, 3038 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 3039 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 3040 &ddsqra->ddsqra_value, tx); 3041 3042 VERIFY0(dsl_prop_get_int_ds(ds, 3043 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 3044 3045 if (ds->ds_quota != newval) { 3046 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3047 ds->ds_quota = newval; 3048 } 3049 dsl_dataset_rele(ds, FTAG); 3050} 3051 3052int 3053dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 3054 uint64_t refquota) 3055{ 3056 dsl_dataset_set_qr_arg_t ddsqra; 3057 3058 ddsqra.ddsqra_name = dsname; 3059 ddsqra.ddsqra_source = source; 3060 ddsqra.ddsqra_value = refquota; 3061 3062 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 3063 dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE)); 3064} 3065 3066static int 3067dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 3068{ 3069 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3070 dsl_pool_t *dp = dmu_tx_pool(tx); 3071 dsl_dataset_t *ds; 3072 int error; 3073 uint64_t newval, unique; 3074 3075 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 3076 return (SET_ERROR(ENOTSUP)); 3077 3078 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 3079 if (error != 0) 3080 return (error); 3081 3082 if (dsl_dataset_is_snapshot(ds)) { 3083 dsl_dataset_rele(ds, FTAG); 3084 return (SET_ERROR(EINVAL)); 3085 } 3086 3087 error = dsl_prop_predict(ds->ds_dir, 3088 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3089 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 3090 if (error != 0) { 3091 dsl_dataset_rele(ds, FTAG); 3092 return (error); 3093 } 3094 3095 /* 3096 * If we are doing the preliminary check in open context, the 3097 * space estimates may be inaccurate. 3098 */ 3099 if (!dmu_tx_is_syncing(tx)) { 3100 dsl_dataset_rele(ds, FTAG); 3101 return (0); 3102 } 3103 3104 mutex_enter(&ds->ds_lock); 3105 if (!DS_UNIQUE_IS_ACCURATE(ds)) 3106 dsl_dataset_recalc_head_uniq(ds); 3107 unique = dsl_dataset_phys(ds)->ds_unique_bytes; 3108 mutex_exit(&ds->ds_lock); 3109 3110 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 3111 uint64_t delta = MAX(unique, newval) - 3112 MAX(unique, ds->ds_reserved); 3113 3114 if (delta > 3115 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 3116 (ds->ds_quota > 0 && newval > ds->ds_quota)) { 3117 dsl_dataset_rele(ds, FTAG); 3118 return (SET_ERROR(ENOSPC)); 3119 } 3120 } 3121 3122 dsl_dataset_rele(ds, FTAG); 3123 return (0); 3124} 3125 3126void 3127dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 3128 zprop_source_t source, uint64_t value, dmu_tx_t *tx) 3129{ 3130 uint64_t newval; 3131 uint64_t unique; 3132 int64_t delta; 3133 3134 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3135 source, sizeof (value), 1, &value, tx); 3136 3137 VERIFY0(dsl_prop_get_int_ds(ds, 3138 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 3139 3140 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3141 mutex_enter(&ds->ds_dir->dd_lock); 3142 mutex_enter(&ds->ds_lock); 3143 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 3144 unique = dsl_dataset_phys(ds)->ds_unique_bytes; 3145 delta = MAX(0, (int64_t)(newval - unique)) - 3146 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3147 ds->ds_reserved = newval; 3148 mutex_exit(&ds->ds_lock); 3149 3150 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3151 mutex_exit(&ds->ds_dir->dd_lock); 3152} 3153 3154static void 3155dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 3156{ 3157 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3158 dsl_pool_t *dp = dmu_tx_pool(tx); 3159 dsl_dataset_t *ds; 3160 3161 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3162 dsl_dataset_set_refreservation_sync_impl(ds, 3163 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 3164 dsl_dataset_rele(ds, FTAG); 3165} 3166 3167int 3168dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 3169 uint64_t refreservation) 3170{ 3171 dsl_dataset_set_qr_arg_t ddsqra; 3172 3173 ddsqra.ddsqra_name = dsname; 3174 ddsqra.ddsqra_source = source; 3175 ddsqra.ddsqra_value = refreservation; 3176 3177 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 3178 dsl_dataset_set_refreservation_sync, &ddsqra, 3179 0, ZFS_SPACE_CHECK_NONE)); 3180} 3181 3182/* 3183 * Return (in *usedp) the amount of space written in new that is not 3184 * present in oldsnap. New may be a snapshot or the head. Old must be 3185 * a snapshot before new, in new's filesystem (or its origin). If not then 3186 * fail and return EINVAL. 3187 * 3188 * The written space is calculated by considering two components: First, we 3189 * ignore any freed space, and calculate the written as new's used space 3190 * minus old's used space. Next, we add in the amount of space that was freed 3191 * between the two snapshots, thus reducing new's used space relative to old's. 3192 * Specifically, this is the space that was born before old->ds_creation_txg, 3193 * and freed before new (ie. on new's deadlist or a previous deadlist). 3194 * 3195 * space freed [---------------------] 3196 * snapshots ---O-------O--------O-------O------ 3197 * oldsnap new 3198 */ 3199int 3200dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 3201 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3202{ 3203 int err = 0; 3204 uint64_t snapobj; 3205 dsl_pool_t *dp = new->ds_dir->dd_pool; 3206 3207 ASSERT(dsl_pool_config_held(dp)); 3208 3209 *usedp = 0; 3210 *usedp += dsl_dataset_phys(new)->ds_referenced_bytes; 3211 *usedp -= dsl_dataset_phys(oldsnap)->ds_referenced_bytes; 3212 3213 *compp = 0; 3214 *compp += dsl_dataset_phys(new)->ds_compressed_bytes; 3215 *compp -= dsl_dataset_phys(oldsnap)->ds_compressed_bytes; 3216 3217 *uncompp = 0; 3218 *uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes; 3219 *uncompp -= dsl_dataset_phys(oldsnap)->ds_uncompressed_bytes; 3220 3221 snapobj = new->ds_object; 3222 while (snapobj != oldsnap->ds_object) { 3223 dsl_dataset_t *snap; 3224 uint64_t used, comp, uncomp; 3225 3226 if (snapobj == new->ds_object) { 3227 snap = new; 3228 } else { 3229 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 3230 if (err != 0) 3231 break; 3232 } 3233 3234 if (dsl_dataset_phys(snap)->ds_prev_snap_txg == 3235 dsl_dataset_phys(oldsnap)->ds_creation_txg) { 3236 /* 3237 * The blocks in the deadlist can not be born after 3238 * ds_prev_snap_txg, so get the whole deadlist space, 3239 * which is more efficient (especially for old-format 3240 * deadlists). Unfortunately the deadlist code 3241 * doesn't have enough information to make this 3242 * optimization itself. 3243 */ 3244 dsl_deadlist_space(&snap->ds_deadlist, 3245 &used, &comp, &uncomp); 3246 } else { 3247 dsl_deadlist_space_range(&snap->ds_deadlist, 3248 0, dsl_dataset_phys(oldsnap)->ds_creation_txg, 3249 &used, &comp, &uncomp); 3250 } 3251 *usedp += used; 3252 *compp += comp; 3253 *uncompp += uncomp; 3254 3255 /* 3256 * If we get to the beginning of the chain of snapshots 3257 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 3258 * was not a snapshot of/before new. 3259 */ 3260 snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj; 3261 if (snap != new) 3262 dsl_dataset_rele(snap, FTAG); 3263 if (snapobj == 0) { 3264 err = SET_ERROR(EINVAL); 3265 break; 3266 } 3267 3268 } 3269 return (err); 3270} 3271 3272/* 3273 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 3274 * lastsnap, and all snapshots in between are deleted. 3275 * 3276 * blocks that would be freed [---------------------------] 3277 * snapshots ---O-------O--------O-------O--------O 3278 * firstsnap lastsnap 3279 * 3280 * This is the set of blocks that were born after the snap before firstsnap, 3281 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 3282 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 3283 * We calculate this by iterating over the relevant deadlists (from the snap 3284 * after lastsnap, backward to the snap after firstsnap), summing up the 3285 * space on the deadlist that was born after the snap before firstsnap. 3286 */ 3287int 3288dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 3289 dsl_dataset_t *lastsnap, 3290 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3291{ 3292 int err = 0; 3293 uint64_t snapobj; 3294 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 3295 3296 ASSERT(dsl_dataset_is_snapshot(firstsnap)); 3297 ASSERT(dsl_dataset_is_snapshot(lastsnap)); 3298 3299 /* 3300 * Check that the snapshots are in the same dsl_dir, and firstsnap 3301 * is before lastsnap. 3302 */ 3303 if (firstsnap->ds_dir != lastsnap->ds_dir || 3304 dsl_dataset_phys(firstsnap)->ds_creation_txg > 3305 dsl_dataset_phys(lastsnap)->ds_creation_txg) 3306 return (SET_ERROR(EINVAL)); 3307 3308 *usedp = *compp = *uncompp = 0; 3309 3310 snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj; 3311 while (snapobj != firstsnap->ds_object) { 3312 dsl_dataset_t *ds; 3313 uint64_t used, comp, uncomp; 3314 3315 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 3316 if (err != 0) 3317 break; 3318 3319 dsl_deadlist_space_range(&ds->ds_deadlist, 3320 dsl_dataset_phys(firstsnap)->ds_prev_snap_txg, UINT64_MAX, 3321 &used, &comp, &uncomp); 3322 *usedp += used; 3323 *compp += comp; 3324 *uncompp += uncomp; 3325 3326 snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 3327 ASSERT3U(snapobj, !=, 0); 3328 dsl_dataset_rele(ds, FTAG); 3329 } 3330 return (err); 3331} 3332 3333static int 3334dsl_dataset_activate_large_blocks_check(void *arg, dmu_tx_t *tx) 3335{ 3336 const char *dsname = arg; 3337 dsl_dataset_t *ds; 3338 dsl_pool_t *dp = dmu_tx_pool(tx); 3339 int error = 0; 3340 3341 if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS)) 3342 return (SET_ERROR(ENOTSUP)); 3343 3344 ASSERT(spa_feature_is_enabled(dp->dp_spa, 3345 SPA_FEATURE_EXTENSIBLE_DATASET)); 3346 3347 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 3348 if (error != 0) 3349 return (error); 3350 3351 if (ds->ds_large_blocks) 3352 error = EALREADY; 3353 dsl_dataset_rele(ds, FTAG); 3354 3355 return (error); 3356} 3357 3358void 3359dsl_dataset_activate_large_blocks_sync_impl(uint64_t dsobj, dmu_tx_t *tx) 3360{ 3361 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 3362 objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; 3363 uint64_t zero = 0; 3364 3365 spa_feature_incr(spa, SPA_FEATURE_LARGE_BLOCKS, tx); 3366 dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); 3367 3368 VERIFY0(zap_add(mos, dsobj, DS_FIELD_LARGE_BLOCKS, 3369 sizeof (zero), 1, &zero, tx)); 3370} 3371 3372static void 3373dsl_dataset_activate_large_blocks_sync(void *arg, dmu_tx_t *tx) 3374{ 3375 const char *dsname = arg; 3376 dsl_dataset_t *ds; 3377 3378 VERIFY0(dsl_dataset_hold(dmu_tx_pool(tx), dsname, FTAG, &ds)); 3379 3380 dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx); 3381 ASSERT(!ds->ds_large_blocks); 3382 ds->ds_large_blocks = B_TRUE; 3383 dsl_dataset_rele(ds, FTAG); 3384} 3385 3386int 3387dsl_dataset_activate_large_blocks(const char *dsname) 3388{ 3389 int error; 3390 3391 error = dsl_sync_task(dsname, 3392 dsl_dataset_activate_large_blocks_check, 3393 dsl_dataset_activate_large_blocks_sync, (void *)dsname, 3394 1, ZFS_SPACE_CHECK_RESERVED); 3395 3396 /* 3397 * EALREADY indicates that this dataset already supports large blocks. 3398 */ 3399 if (error == EALREADY) 3400 error = 0; 3401 return (error); 3402} 3403 3404/* 3405 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 3406 * For example, they could both be snapshots of the same filesystem, and 3407 * 'earlier' is before 'later'. Or 'earlier' could be the origin of 3408 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 3409 * filesystem. Or 'earlier' could be the origin's origin. 3410 * 3411 * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. 3412 */ 3413boolean_t 3414dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, 3415 uint64_t earlier_txg) 3416{ 3417 dsl_pool_t *dp = later->ds_dir->dd_pool; 3418 int error; 3419 boolean_t ret; 3420 3421 ASSERT(dsl_pool_config_held(dp)); 3422 ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0); 3423 3424 if (earlier_txg == 0) 3425 earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg; 3426 3427 if (dsl_dataset_is_snapshot(later) && 3428 earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg) 3429 return (B_FALSE); 3430 3431 if (later->ds_dir == earlier->ds_dir) 3432 return (B_TRUE); 3433 if (!dsl_dir_is_clone(later->ds_dir)) 3434 return (B_FALSE); 3435 3436 if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object) 3437 return (B_TRUE); 3438 dsl_dataset_t *origin; 3439 error = dsl_dataset_hold_obj(dp, 3440 dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin); 3441 if (error != 0) 3442 return (B_FALSE); 3443 ret = dsl_dataset_is_before(origin, earlier, earlier_txg); 3444 dsl_dataset_rele(origin, FTAG); 3445 return (ret); 3446} 3447 3448 3449void 3450dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) 3451{ 3452 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3453 dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); 3454} 3455