1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23262320Sdelphij * Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org> 24268659Sdelphij * Copyright (c) 2013, 2014 by Delphix. All rights reserved. 25265744Sdelphij * Copyright (c) 2014, Joyent, Inc. All rights reserved. 26262320Sdelphij * Copyright (c) 2014 RackTop Systems. 27168404Spjd */ 28168404Spjd 29168404Spjd#include <sys/dmu_objset.h> 30168404Spjd#include <sys/dsl_dataset.h> 31168404Spjd#include <sys/dsl_dir.h> 32168404Spjd#include <sys/dsl_prop.h> 33168404Spjd#include <sys/dsl_synctask.h> 34168404Spjd#include <sys/dmu_traverse.h> 35235222Smm#include <sys/dmu_impl.h> 36168404Spjd#include <sys/dmu_tx.h> 37168404Spjd#include <sys/arc.h> 38168404Spjd#include <sys/zio.h> 39168404Spjd#include <sys/zap.h> 40236884Smm#include <sys/zfeature.h> 41168404Spjd#include <sys/unique.h> 42168404Spjd#include <sys/zfs_context.h> 43168676Spjd#include <sys/zfs_ioctl.h> 44185029Spjd#include <sys/spa.h> 45185029Spjd#include <sys/zfs_znode.h> 46219089Spjd#include <sys/zfs_onexit.h> 47219089Spjd#include <sys/zvol.h> 48219089Spjd#include <sys/dsl_scan.h> 49219089Spjd#include <sys/dsl_deadlist.h> 50248571Smm#include <sys/dsl_destroy.h> 51248571Smm#include <sys/dsl_userhold.h> 52263407Sdelphij#include <sys/dsl_bookmark.h> 53168404Spjd 54219089Spjd#define SWITCH64(x, y) \ 55219089Spjd { \ 56219089Spjd uint64_t __tmp = (x); \ 57219089Spjd (x) = (y); \ 58219089Spjd (y) = __tmp; \ 59219089Spjd } 60219089Spjd 61168404Spjd#define DS_REF_MAX (1ULL << 62) 62168404Spjd 63168404Spjd#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 64168404Spjd 65168404Spjd/* 66185029Spjd * Figure out how much of this delta should be propogated to the dsl_dir 67185029Spjd * layer. If there's a refreservation, that space has already been 68185029Spjd * partially accounted for in our ancestors. 69168404Spjd */ 70185029Spjdstatic int64_t 71185029Spjdparent_delta(dsl_dataset_t *ds, int64_t delta) 72185029Spjd{ 73185029Spjd uint64_t old_bytes, new_bytes; 74168404Spjd 75185029Spjd if (ds->ds_reserved == 0) 76185029Spjd return (delta); 77168404Spjd 78185029Spjd old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 79185029Spjd new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 80185029Spjd 81185029Spjd ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 82185029Spjd return (new_bytes - old_bytes); 83185029Spjd} 84185029Spjd 85168404Spjdvoid 86219089Spjddsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 87168404Spjd{ 88219089Spjd int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 89168404Spjd int compressed = BP_GET_PSIZE(bp); 90168404Spjd int uncompressed = BP_GET_UCSIZE(bp); 91185029Spjd int64_t delta; 92168404Spjd 93219089Spjd dprintf_bp(bp, "ds=%p", ds); 94168404Spjd 95168404Spjd ASSERT(dmu_tx_is_syncing(tx)); 96168404Spjd /* It could have been compressed away to nothing */ 97168404Spjd if (BP_IS_HOLE(bp)) 98168404Spjd return; 99168404Spjd ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 100236884Smm ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 101168404Spjd if (ds == NULL) { 102239620Smm dsl_pool_mos_diduse_space(tx->tx_pool, 103239620Smm used, compressed, uncompressed); 104168404Spjd return; 105168404Spjd } 106254757Sdelphij 107168404Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 108168404Spjd mutex_enter(&ds->ds_lock); 109185029Spjd delta = parent_delta(ds, used); 110236884Smm ds->ds_phys->ds_referenced_bytes += used; 111168404Spjd ds->ds_phys->ds_compressed_bytes += compressed; 112168404Spjd ds->ds_phys->ds_uncompressed_bytes += uncompressed; 113168404Spjd ds->ds_phys->ds_unique_bytes += used; 114168404Spjd mutex_exit(&ds->ds_lock); 115185029Spjd dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 116185029Spjd compressed, uncompressed, tx); 117185029Spjd dsl_dir_transfer_space(ds->ds_dir, used - delta, 118185029Spjd DD_USED_REFRSRV, DD_USED_HEAD, tx); 119168404Spjd} 120168404Spjd 121185029Spjdint 122219089Spjddsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 123219089Spjd boolean_t async) 124168404Spjd{ 125263397Sdelphij int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 126263397Sdelphij int compressed = BP_GET_PSIZE(bp); 127263397Sdelphij int uncompressed = BP_GET_UCSIZE(bp); 128263397Sdelphij 129219089Spjd if (BP_IS_HOLE(bp)) 130219089Spjd return (0); 131219089Spjd 132219089Spjd ASSERT(dmu_tx_is_syncing(tx)); 133219089Spjd ASSERT(bp->blk_birth <= tx->tx_txg); 134219089Spjd 135168404Spjd if (ds == NULL) { 136219089Spjd dsl_free(tx->tx_pool, tx->tx_txg, bp); 137239620Smm dsl_pool_mos_diduse_space(tx->tx_pool, 138239620Smm -used, -compressed, -uncompressed); 139185029Spjd return (used); 140168404Spjd } 141168404Spjd ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 142168404Spjd 143185029Spjd ASSERT(!dsl_dataset_is_snapshot(ds)); 144168404Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 145168404Spjd 146168404Spjd if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 147185029Spjd int64_t delta; 148168404Spjd 149219089Spjd dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 150219089Spjd dsl_free(tx->tx_pool, tx->tx_txg, bp); 151168404Spjd 152168404Spjd mutex_enter(&ds->ds_lock); 153185029Spjd ASSERT(ds->ds_phys->ds_unique_bytes >= used || 154185029Spjd !DS_UNIQUE_IS_ACCURATE(ds)); 155185029Spjd delta = parent_delta(ds, -used); 156168404Spjd ds->ds_phys->ds_unique_bytes -= used; 157168404Spjd mutex_exit(&ds->ds_lock); 158185029Spjd dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 159185029Spjd delta, -compressed, -uncompressed, tx); 160185029Spjd dsl_dir_transfer_space(ds->ds_dir, -used - delta, 161185029Spjd DD_USED_REFRSRV, DD_USED_HEAD, tx); 162168404Spjd } else { 163168404Spjd dprintf_bp(bp, "putting on dead list: %s", ""); 164219089Spjd if (async) { 165219089Spjd /* 166219089Spjd * We are here as part of zio's write done callback, 167219089Spjd * which means we're a zio interrupt thread. We can't 168219089Spjd * call dsl_deadlist_insert() now because it may block 169219089Spjd * waiting for I/O. Instead, put bp on the deferred 170219089Spjd * queue and let dsl_pool_sync() finish the job. 171219089Spjd */ 172219089Spjd bplist_append(&ds->ds_pending_deadlist, bp); 173219089Spjd } else { 174219089Spjd dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 175219089Spjd } 176185029Spjd ASSERT3U(ds->ds_prev->ds_object, ==, 177185029Spjd ds->ds_phys->ds_prev_snap_obj); 178185029Spjd ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 179168404Spjd /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 180185029Spjd if (ds->ds_prev->ds_phys->ds_next_snap_obj == 181185029Spjd ds->ds_object && bp->blk_birth > 182185029Spjd ds->ds_prev->ds_phys->ds_prev_snap_txg) { 183185029Spjd dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 184185029Spjd mutex_enter(&ds->ds_prev->ds_lock); 185185029Spjd ds->ds_prev->ds_phys->ds_unique_bytes += used; 186185029Spjd mutex_exit(&ds->ds_prev->ds_lock); 187168404Spjd } 188219089Spjd if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 189185029Spjd dsl_dir_transfer_space(ds->ds_dir, used, 190185029Spjd DD_USED_HEAD, DD_USED_SNAP, tx); 191185029Spjd } 192168404Spjd } 193168404Spjd mutex_enter(&ds->ds_lock); 194236884Smm ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); 195236884Smm ds->ds_phys->ds_referenced_bytes -= used; 196168404Spjd ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 197168404Spjd ds->ds_phys->ds_compressed_bytes -= compressed; 198168404Spjd ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 199168404Spjd ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 200168404Spjd mutex_exit(&ds->ds_lock); 201185029Spjd 202185029Spjd return (used); 203168404Spjd} 204168404Spjd 205168404Spjduint64_t 206168404Spjddsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 207168404Spjd{ 208168404Spjd uint64_t trysnap = 0; 209168404Spjd 210168404Spjd if (ds == NULL) 211168404Spjd return (0); 212168404Spjd /* 213168404Spjd * The snapshot creation could fail, but that would cause an 214168404Spjd * incorrect FALSE return, which would only result in an 215168404Spjd * overestimation of the amount of space that an operation would 216168404Spjd * consume, which is OK. 217168404Spjd * 218168404Spjd * There's also a small window where we could miss a pending 219168404Spjd * snapshot, because we could set the sync task in the quiescing 220168404Spjd * phase. So this should only be used as a guess. 221168404Spjd */ 222168404Spjd if (ds->ds_trysnap_txg > 223168404Spjd spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 224168404Spjd trysnap = ds->ds_trysnap_txg; 225168404Spjd return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 226168404Spjd} 227168404Spjd 228209962Smmboolean_t 229219089Spjddsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 230219089Spjd uint64_t blk_birth) 231168404Spjd{ 232263397Sdelphij if (blk_birth <= dsl_dataset_prev_snap_txg(ds) || 233263397Sdelphij (bp != NULL && BP_IS_HOLE(bp))) 234219089Spjd return (B_FALSE); 235219089Spjd 236219089Spjd ddt_prefetch(dsl_dataset_get_spa(ds), bp); 237219089Spjd 238219089Spjd return (B_TRUE); 239168404Spjd} 240168404Spjd 241168404Spjd/* ARGSUSED */ 242168404Spjdstatic void 243168404Spjddsl_dataset_evict(dmu_buf_t *db, void *dsv) 244168404Spjd{ 245168404Spjd dsl_dataset_t *ds = dsv; 246168404Spjd 247248571Smm ASSERT(ds->ds_owner == NULL); 248168404Spjd 249185029Spjd unique_remove(ds->ds_fsid_guid); 250168404Spjd 251219089Spjd if (ds->ds_objset != NULL) 252219089Spjd dmu_objset_evict(ds->ds_objset); 253168404Spjd 254168404Spjd if (ds->ds_prev) { 255248571Smm dsl_dataset_rele(ds->ds_prev, ds); 256168404Spjd ds->ds_prev = NULL; 257168404Spjd } 258168404Spjd 259219089Spjd bplist_destroy(&ds->ds_pending_deadlist); 260248571Smm if (ds->ds_phys->ds_deadlist_obj != 0) 261219089Spjd dsl_deadlist_close(&ds->ds_deadlist); 262185029Spjd if (ds->ds_dir) 263248571Smm dsl_dir_rele(ds->ds_dir, ds); 264168404Spjd 265185029Spjd ASSERT(!list_link_active(&ds->ds_synced_link)); 266168404Spjd 267185029Spjd if (mutex_owned(&ds->ds_lock)) 268185029Spjd mutex_exit(&ds->ds_lock); 269168404Spjd mutex_destroy(&ds->ds_lock); 270185029Spjd if (mutex_owned(&ds->ds_opening_lock)) 271185029Spjd mutex_exit(&ds->ds_opening_lock); 272185029Spjd mutex_destroy(&ds->ds_opening_lock); 273269218Sdelphij mutex_destroy(&ds->ds_sendstream_lock); 274248571Smm refcount_destroy(&ds->ds_longholds); 275168404Spjd 276168404Spjd kmem_free(ds, sizeof (dsl_dataset_t)); 277168404Spjd} 278168404Spjd 279248571Smmint 280168404Spjddsl_dataset_get_snapname(dsl_dataset_t *ds) 281168404Spjd{ 282168404Spjd dsl_dataset_phys_t *headphys; 283168404Spjd int err; 284168404Spjd dmu_buf_t *headdbuf; 285168404Spjd dsl_pool_t *dp = ds->ds_dir->dd_pool; 286168404Spjd objset_t *mos = dp->dp_meta_objset; 287168404Spjd 288168404Spjd if (ds->ds_snapname[0]) 289168404Spjd return (0); 290168404Spjd if (ds->ds_phys->ds_next_snap_obj == 0) 291168404Spjd return (0); 292168404Spjd 293168404Spjd err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 294168404Spjd FTAG, &headdbuf); 295248571Smm if (err != 0) 296168404Spjd return (err); 297168404Spjd headphys = headdbuf->db_data; 298168404Spjd err = zap_value_search(dp->dp_meta_objset, 299185029Spjd headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 300168404Spjd dmu_buf_rele(headdbuf, FTAG); 301168404Spjd return (err); 302168404Spjd} 303168404Spjd 304248571Smmint 305185029Spjddsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 306168404Spjd{ 307185029Spjd objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 308185029Spjd uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 309185029Spjd matchtype_t mt; 310185029Spjd int err; 311185029Spjd 312185029Spjd if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 313185029Spjd mt = MT_FIRST; 314185029Spjd else 315185029Spjd mt = MT_EXACT; 316185029Spjd 317185029Spjd err = zap_lookup_norm(mos, snapobj, name, 8, 1, 318185029Spjd value, mt, NULL, 0, NULL); 319185029Spjd if (err == ENOTSUP && mt == MT_FIRST) 320185029Spjd err = zap_lookup(mos, snapobj, name, 8, 1, value); 321185029Spjd return (err); 322185029Spjd} 323185029Spjd 324248571Smmint 325265744Sdelphijdsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, 326265744Sdelphij boolean_t adj_cnt) 327185029Spjd{ 328185029Spjd objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 329185029Spjd uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 330185029Spjd matchtype_t mt; 331185029Spjd int err; 332185029Spjd 333219089Spjd dsl_dir_snap_cmtime_update(ds->ds_dir); 334219089Spjd 335185029Spjd if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 336185029Spjd mt = MT_FIRST; 337185029Spjd else 338185029Spjd mt = MT_EXACT; 339185029Spjd 340185029Spjd err = zap_remove_norm(mos, snapobj, name, mt, tx); 341185029Spjd if (err == ENOTSUP && mt == MT_FIRST) 342185029Spjd err = zap_remove(mos, snapobj, name, tx); 343265744Sdelphij 344265744Sdelphij if (err == 0 && adj_cnt) 345265744Sdelphij dsl_fs_ss_count_adjust(ds->ds_dir, -1, 346265744Sdelphij DD_FIELD_SNAPSHOT_COUNT, tx); 347265744Sdelphij 348185029Spjd return (err); 349185029Spjd} 350185029Spjd 351248571Smmint 352248571Smmdsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 353185029Spjd dsl_dataset_t **dsp) 354185029Spjd{ 355168404Spjd objset_t *mos = dp->dp_meta_objset; 356168404Spjd dmu_buf_t *dbuf; 357168404Spjd dsl_dataset_t *ds; 358168404Spjd int err; 359219089Spjd dmu_object_info_t doi; 360168404Spjd 361248571Smm ASSERT(dsl_pool_config_held(dp)); 362168404Spjd 363168404Spjd err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 364248571Smm if (err != 0) 365168404Spjd return (err); 366219089Spjd 367219089Spjd /* Make sure dsobj has the correct object type. */ 368219089Spjd dmu_object_info_from_db(dbuf, &doi); 369263390Sdelphij if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) { 370251632Sdelphij dmu_buf_rele(dbuf, tag); 371249195Smm return (SET_ERROR(EINVAL)); 372251632Sdelphij } 373219089Spjd 374168404Spjd ds = dmu_buf_get_user(dbuf); 375168404Spjd if (ds == NULL) { 376247187Smm dsl_dataset_t *winner = NULL; 377168404Spjd 378168404Spjd ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 379168404Spjd ds->ds_dbuf = dbuf; 380168404Spjd ds->ds_object = dsobj; 381168404Spjd ds->ds_phys = dbuf->db_data; 382168404Spjd 383168404Spjd mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 384185029Spjd mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 385235222Smm mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 386248571Smm refcount_create(&ds->ds_longholds); 387235222Smm 388219089Spjd bplist_create(&ds->ds_pending_deadlist); 389219089Spjd dsl_deadlist_open(&ds->ds_deadlist, 390168404Spjd mos, ds->ds_phys->ds_deadlist_obj); 391219089Spjd 392235222Smm list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 393235222Smm offsetof(dmu_sendarg_t, dsa_link)); 394235222Smm 395168404Spjd if (err == 0) { 396248571Smm err = dsl_dir_hold_obj(dp, 397168404Spjd ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 398168404Spjd } 399248571Smm if (err != 0) { 400168404Spjd mutex_destroy(&ds->ds_lock); 401185029Spjd mutex_destroy(&ds->ds_opening_lock); 402269218Sdelphij mutex_destroy(&ds->ds_sendstream_lock); 403248571Smm refcount_destroy(&ds->ds_longholds); 404219089Spjd bplist_destroy(&ds->ds_pending_deadlist); 405219089Spjd dsl_deadlist_close(&ds->ds_deadlist); 406168404Spjd kmem_free(ds, sizeof (dsl_dataset_t)); 407168404Spjd dmu_buf_rele(dbuf, tag); 408168404Spjd return (err); 409168404Spjd } 410168404Spjd 411185029Spjd if (!dsl_dataset_is_snapshot(ds)) { 412168404Spjd ds->ds_snapname[0] = '\0'; 413248571Smm if (ds->ds_phys->ds_prev_snap_obj != 0) { 414248571Smm err = dsl_dataset_hold_obj(dp, 415185029Spjd ds->ds_phys->ds_prev_snap_obj, 416185029Spjd ds, &ds->ds_prev); 417168404Spjd } 418263407Sdelphij if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 419263407Sdelphij int zaperr = zap_lookup(mos, ds->ds_object, 420263407Sdelphij DS_FIELD_BOOKMARK_NAMES, 421263407Sdelphij sizeof (ds->ds_bookmarks), 1, 422263407Sdelphij &ds->ds_bookmarks); 423263407Sdelphij if (zaperr != ENOENT) 424263407Sdelphij VERIFY0(zaperr); 425263407Sdelphij } 426219089Spjd } else { 427219089Spjd if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 428219089Spjd err = dsl_dataset_get_snapname(ds); 429219089Spjd if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 430219089Spjd err = zap_count( 431219089Spjd ds->ds_dir->dd_pool->dp_meta_objset, 432219089Spjd ds->ds_phys->ds_userrefs_obj, 433219089Spjd &ds->ds_userrefs); 434168404Spjd } 435168404Spjd } 436168404Spjd 437185029Spjd if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 438248571Smm err = dsl_prop_get_int_ds(ds, 439248571Smm zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 440248571Smm &ds->ds_reserved); 441185029Spjd if (err == 0) { 442248571Smm err = dsl_prop_get_int_ds(ds, 443248571Smm zfs_prop_to_name(ZFS_PROP_REFQUOTA), 444248571Smm &ds->ds_quota); 445185029Spjd } 446185029Spjd } else { 447185029Spjd ds->ds_reserved = ds->ds_quota = 0; 448185029Spjd } 449185029Spjd 450247187Smm if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, 451247187Smm &ds->ds_phys, dsl_dataset_evict)) != NULL) { 452219089Spjd bplist_destroy(&ds->ds_pending_deadlist); 453219089Spjd dsl_deadlist_close(&ds->ds_deadlist); 454185029Spjd if (ds->ds_prev) 455248571Smm dsl_dataset_rele(ds->ds_prev, ds); 456248571Smm dsl_dir_rele(ds->ds_dir, ds); 457168404Spjd mutex_destroy(&ds->ds_lock); 458185029Spjd mutex_destroy(&ds->ds_opening_lock); 459269218Sdelphij mutex_destroy(&ds->ds_sendstream_lock); 460248571Smm refcount_destroy(&ds->ds_longholds); 461168404Spjd kmem_free(ds, sizeof (dsl_dataset_t)); 462248571Smm if (err != 0) { 463168404Spjd dmu_buf_rele(dbuf, tag); 464168404Spjd return (err); 465168404Spjd } 466168404Spjd ds = winner; 467168404Spjd } else { 468185029Spjd ds->ds_fsid_guid = 469168404Spjd unique_insert(ds->ds_phys->ds_fsid_guid); 470168404Spjd } 471168404Spjd } 472168404Spjd ASSERT3P(ds->ds_dbuf, ==, dbuf); 473168404Spjd ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 474185029Spjd ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 475185029Spjd spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 476185029Spjd dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 477168404Spjd *dsp = ds; 478168404Spjd return (0); 479168404Spjd} 480168404Spjd 481168404Spjdint 482248571Smmdsl_dataset_hold(dsl_pool_t *dp, const char *name, 483219089Spjd void *tag, dsl_dataset_t **dsp) 484185029Spjd{ 485168404Spjd dsl_dir_t *dd; 486185029Spjd const char *snapname; 487168404Spjd uint64_t obj; 488168404Spjd int err = 0; 489168404Spjd 490248571Smm err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 491248571Smm if (err != 0) 492168404Spjd return (err); 493168404Spjd 494248571Smm ASSERT(dsl_pool_config_held(dp)); 495168404Spjd obj = dd->dd_phys->dd_head_dataset_obj; 496248571Smm if (obj != 0) 497248571Smm err = dsl_dataset_hold_obj(dp, obj, tag, dsp); 498185029Spjd else 499249195Smm err = SET_ERROR(ENOENT); 500168404Spjd 501185029Spjd /* we may be looking for a snapshot */ 502185029Spjd if (err == 0 && snapname != NULL) { 503248571Smm dsl_dataset_t *ds; 504168404Spjd 505185029Spjd if (*snapname++ != '@') { 506185029Spjd dsl_dataset_rele(*dsp, tag); 507248571Smm dsl_dir_rele(dd, FTAG); 508249195Smm return (SET_ERROR(ENOENT)); 509168404Spjd } 510168404Spjd 511185029Spjd dprintf("looking for snapshot '%s'\n", snapname); 512185029Spjd err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 513185029Spjd if (err == 0) 514248571Smm err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 515185029Spjd dsl_dataset_rele(*dsp, tag); 516185029Spjd 517248571Smm if (err == 0) { 518185029Spjd mutex_enter(&ds->ds_lock); 519185029Spjd if (ds->ds_snapname[0] == 0) 520185029Spjd (void) strlcpy(ds->ds_snapname, snapname, 521185029Spjd sizeof (ds->ds_snapname)); 522185029Spjd mutex_exit(&ds->ds_lock); 523248571Smm *dsp = ds; 524168404Spjd } 525168404Spjd } 526248571Smm 527248571Smm dsl_dir_rele(dd, FTAG); 528168404Spjd return (err); 529168404Spjd} 530168404Spjd 531168404Spjdint 532248571Smmdsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 533219089Spjd void *tag, dsl_dataset_t **dsp) 534168404Spjd{ 535248571Smm int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 536248571Smm if (err != 0) 537185029Spjd return (err); 538248571Smm if (!dsl_dataset_tryown(*dsp, tag)) { 539219089Spjd dsl_dataset_rele(*dsp, tag); 540248571Smm *dsp = NULL; 541249195Smm return (SET_ERROR(EBUSY)); 542185029Spjd } 543185029Spjd return (0); 544168404Spjd} 545168404Spjd 546248571Smmint 547248571Smmdsl_dataset_own(dsl_pool_t *dp, const char *name, 548248571Smm void *tag, dsl_dataset_t **dsp) 549248571Smm{ 550248571Smm int err = dsl_dataset_hold(dp, name, tag, dsp); 551248571Smm if (err != 0) 552248571Smm return (err); 553248571Smm if (!dsl_dataset_tryown(*dsp, tag)) { 554248571Smm dsl_dataset_rele(*dsp, tag); 555249195Smm return (SET_ERROR(EBUSY)); 556248571Smm } 557248571Smm return (0); 558248571Smm} 559248571Smm 560248571Smm/* 561248571Smm * See the comment above dsl_pool_hold() for details. In summary, a long 562248571Smm * hold is used to prevent destruction of a dataset while the pool hold 563248571Smm * is dropped, allowing other concurrent operations (e.g. spa_sync()). 564248571Smm * 565248571Smm * The dataset and pool must be held when this function is called. After it 566248571Smm * is called, the pool hold may be released while the dataset is still held 567248571Smm * and accessed. 568248571Smm */ 569168404Spjdvoid 570248571Smmdsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 571248571Smm{ 572248571Smm ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 573248571Smm (void) refcount_add(&ds->ds_longholds, tag); 574248571Smm} 575248571Smm 576248571Smmvoid 577248571Smmdsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 578248571Smm{ 579248571Smm (void) refcount_remove(&ds->ds_longholds, tag); 580248571Smm} 581248571Smm 582248571Smm/* Return B_TRUE if there are any long holds on this dataset. */ 583248571Smmboolean_t 584248571Smmdsl_dataset_long_held(dsl_dataset_t *ds) 585248571Smm{ 586248571Smm return (!refcount_is_zero(&ds->ds_longholds)); 587248571Smm} 588248571Smm 589248571Smmvoid 590168404Spjddsl_dataset_name(dsl_dataset_t *ds, char *name) 591168404Spjd{ 592168404Spjd if (ds == NULL) { 593168404Spjd (void) strcpy(name, "mos"); 594168404Spjd } else { 595168404Spjd dsl_dir_name(ds->ds_dir, name); 596248571Smm VERIFY0(dsl_dataset_get_snapname(ds)); 597168404Spjd if (ds->ds_snapname[0]) { 598168404Spjd (void) strcat(name, "@"); 599185029Spjd /* 600185029Spjd * We use a "recursive" mutex so that we 601185029Spjd * can call dprintf_ds() with ds_lock held. 602185029Spjd */ 603168404Spjd if (!MUTEX_HELD(&ds->ds_lock)) { 604168404Spjd mutex_enter(&ds->ds_lock); 605168404Spjd (void) strcat(name, ds->ds_snapname); 606168404Spjd mutex_exit(&ds->ds_lock); 607168404Spjd } else { 608168404Spjd (void) strcat(name, ds->ds_snapname); 609168404Spjd } 610168404Spjd } 611168404Spjd } 612168404Spjd} 613168404Spjd 614168404Spjdvoid 615248571Smmdsl_dataset_rele(dsl_dataset_t *ds, void *tag) 616168404Spjd{ 617185029Spjd dmu_buf_rele(ds->ds_dbuf, tag); 618185029Spjd} 619185029Spjd 620185029Spjdvoid 621219089Spjddsl_dataset_disown(dsl_dataset_t *ds, void *tag) 622185029Spjd{ 623248571Smm ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL); 624185029Spjd 625168404Spjd mutex_enter(&ds->ds_lock); 626185029Spjd ds->ds_owner = NULL; 627168404Spjd mutex_exit(&ds->ds_lock); 628248571Smm dsl_dataset_long_rele(ds, tag); 629248571Smm if (ds->ds_dbuf != NULL) 630248571Smm dsl_dataset_rele(ds, tag); 631185029Spjd else 632219089Spjd dsl_dataset_evict(NULL, ds); 633185029Spjd} 634168404Spjd 635185029Spjdboolean_t 636248571Smmdsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 637185029Spjd{ 638185029Spjd boolean_t gotit = FALSE; 639185029Spjd 640185029Spjd mutex_enter(&ds->ds_lock); 641248571Smm if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 642219089Spjd ds->ds_owner = tag; 643248571Smm dsl_dataset_long_hold(ds, tag); 644185029Spjd gotit = TRUE; 645185029Spjd } 646185029Spjd mutex_exit(&ds->ds_lock); 647185029Spjd return (gotit); 648168404Spjd} 649168404Spjd 650185029Spjduint64_t 651185029Spjddsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 652185029Spjd uint64_t flags, dmu_tx_t *tx) 653185029Spjd{ 654185029Spjd dsl_pool_t *dp = dd->dd_pool; 655168404Spjd dmu_buf_t *dbuf; 656168404Spjd dsl_dataset_phys_t *dsphys; 657168404Spjd uint64_t dsobj; 658185029Spjd objset_t *mos = dp->dp_meta_objset; 659168404Spjd 660185029Spjd if (origin == NULL) 661185029Spjd origin = dp->dp_origin_snap; 662168404Spjd 663185029Spjd ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 664185029Spjd ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 665185029Spjd ASSERT(dmu_tx_is_syncing(tx)); 666185029Spjd ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 667185029Spjd 668168404Spjd dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 669168404Spjd DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 670248571Smm VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 671168404Spjd dmu_buf_will_dirty(dbuf, tx); 672168404Spjd dsphys = dbuf->db_data; 673185029Spjd bzero(dsphys, sizeof (dsl_dataset_phys_t)); 674168404Spjd dsphys->ds_dir_obj = dd->dd_object; 675185029Spjd dsphys->ds_flags = flags; 676168404Spjd dsphys->ds_fsid_guid = unique_create(); 677236823Spjd do { 678236823Spjd (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 679236823Spjd sizeof (dsphys->ds_guid)); 680236823Spjd } while (dsphys->ds_guid == 0); 681168404Spjd dsphys->ds_snapnames_zapobj = 682185029Spjd zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 683185029Spjd DMU_OT_NONE, 0, tx); 684168404Spjd dsphys->ds_creation_time = gethrestime_sec(); 685185029Spjd dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 686185029Spjd 687219089Spjd if (origin == NULL) { 688219089Spjd dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 689219089Spjd } else { 690248571Smm dsl_dataset_t *ohds; /* head of the origin snapshot */ 691219089Spjd 692185029Spjd dsphys->ds_prev_snap_obj = origin->ds_object; 693185029Spjd dsphys->ds_prev_snap_txg = 694185029Spjd origin->ds_phys->ds_creation_txg; 695236884Smm dsphys->ds_referenced_bytes = 696236884Smm origin->ds_phys->ds_referenced_bytes; 697185029Spjd dsphys->ds_compressed_bytes = 698185029Spjd origin->ds_phys->ds_compressed_bytes; 699185029Spjd dsphys->ds_uncompressed_bytes = 700185029Spjd origin->ds_phys->ds_uncompressed_bytes; 701185029Spjd dsphys->ds_bp = origin->ds_phys->ds_bp; 702185029Spjd dsphys->ds_flags |= origin->ds_phys->ds_flags; 703185029Spjd 704185029Spjd dmu_buf_will_dirty(origin->ds_dbuf, tx); 705185029Spjd origin->ds_phys->ds_num_children++; 706185029Spjd 707248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 708219089Spjd origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 709219089Spjd dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 710219089Spjd dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 711219089Spjd dsl_dataset_rele(ohds, FTAG); 712219089Spjd 713185029Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 714185029Spjd if (origin->ds_phys->ds_next_clones_obj == 0) { 715185029Spjd origin->ds_phys->ds_next_clones_obj = 716185029Spjd zap_create(mos, 717185029Spjd DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 718185029Spjd } 719248571Smm VERIFY0(zap_add_int(mos, 720248571Smm origin->ds_phys->ds_next_clones_obj, dsobj, tx)); 721185029Spjd } 722185029Spjd 723185029Spjd dmu_buf_will_dirty(dd->dd_dbuf, tx); 724185029Spjd dd->dd_phys->dd_origin_obj = origin->ds_object; 725219089Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 726219089Spjd if (origin->ds_dir->dd_phys->dd_clones == 0) { 727219089Spjd dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 728219089Spjd origin->ds_dir->dd_phys->dd_clones = 729219089Spjd zap_create(mos, 730219089Spjd DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 731219089Spjd } 732248571Smm VERIFY0(zap_add_int(mos, 733219089Spjd origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 734219089Spjd } 735185029Spjd } 736185029Spjd 737185029Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 738185029Spjd dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 739185029Spjd 740168404Spjd dmu_buf_rele(dbuf, FTAG); 741168404Spjd 742168404Spjd dmu_buf_will_dirty(dd->dd_dbuf, tx); 743168404Spjd dd->dd_phys->dd_head_dataset_obj = dsobj; 744168404Spjd 745185029Spjd return (dsobj); 746168404Spjd} 747168404Spjd 748248571Smmstatic void 749248571Smmdsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 750248571Smm{ 751248571Smm objset_t *os; 752248571Smm 753248571Smm VERIFY0(dmu_objset_from_ds(ds, &os)); 754248571Smm bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 755248571Smm dsl_dataset_dirty(ds, tx); 756248571Smm} 757248571Smm 758168404Spjduint64_t 759185029Spjddsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 760185029Spjd dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 761168404Spjd{ 762168404Spjd dsl_pool_t *dp = pdd->dd_pool; 763168404Spjd uint64_t dsobj, ddobj; 764168404Spjd dsl_dir_t *dd; 765168404Spjd 766248571Smm ASSERT(dmu_tx_is_syncing(tx)); 767168404Spjd ASSERT(lastname[0] != '@'); 768168404Spjd 769185029Spjd ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 770248571Smm VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 771168404Spjd 772248571Smm dsobj = dsl_dataset_create_sync_dd(dd, origin, 773248571Smm flags & ~DS_CREATE_FLAG_NODIRTY, tx); 774168404Spjd 775185029Spjd dsl_deleg_set_create_perms(dd, tx, cr); 776168404Spjd 777265744Sdelphij /* 778265744Sdelphij * Since we're creating a new node we know it's a leaf, so we can 779265744Sdelphij * initialize the counts if the limit feature is active. 780265744Sdelphij */ 781265744Sdelphij if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 782265744Sdelphij uint64_t cnt = 0; 783265744Sdelphij objset_t *os = dd->dd_pool->dp_meta_objset; 784265744Sdelphij 785265744Sdelphij dsl_dir_zapify(dd, tx); 786265744Sdelphij VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 787265744Sdelphij sizeof (cnt), 1, &cnt, tx)); 788265744Sdelphij VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 789265744Sdelphij sizeof (cnt), 1, &cnt, tx)); 790265744Sdelphij } 791265744Sdelphij 792248571Smm dsl_dir_rele(dd, FTAG); 793168404Spjd 794219089Spjd /* 795219089Spjd * If we are creating a clone, make sure we zero out any stale 796219089Spjd * data from the origin snapshots zil header. 797219089Spjd */ 798248571Smm if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 799219089Spjd dsl_dataset_t *ds; 800219089Spjd 801248571Smm VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 802248571Smm dsl_dataset_zero_zil(ds, tx); 803219089Spjd dsl_dataset_rele(ds, FTAG); 804219089Spjd } 805219089Spjd 806168404Spjd return (dsobj); 807168404Spjd} 808168404Spjd 809228103Smm#ifdef __FreeBSD__ 810228103Smm/* FreeBSD ioctl compat begin */ 811168404Spjdstruct destroyarg { 812228103Smm nvlist_t *nvl; 813228103Smm const char *snapname; 814168404Spjd}; 815168404Spjd 816168404Spjdstatic int 817228103Smmdsl_check_snap_cb(const char *name, void *arg) 818168404Spjd{ 819168404Spjd struct destroyarg *da = arg; 820168404Spjd dsl_dataset_t *ds; 821219089Spjd char *dsname; 822168404Spjd 823219089Spjd dsname = kmem_asprintf("%s@%s", name, da->snapname); 824248493Smm fnvlist_add_boolean(da->nvl, dsname); 825248493Smm kmem_free(dsname, strlen(dsname) + 1); 826219089Spjd 827228103Smm return (0); 828228103Smm} 829228103Smm 830228103Smmint 831248571Smmdmu_get_recursive_snaps_nvl(char *fsname, const char *snapname, 832228103Smm nvlist_t *snaps) 833228103Smm{ 834228103Smm struct destroyarg *da; 835228103Smm int err; 836228103Smm 837228103Smm da = kmem_zalloc(sizeof (struct destroyarg), KM_SLEEP); 838228103Smm da->nvl = snaps; 839228103Smm da->snapname = snapname; 840228103Smm err = dmu_objset_find(fsname, dsl_check_snap_cb, da, 841228103Smm DS_FIND_CHILDREN); 842228103Smm kmem_free(da, sizeof (struct destroyarg)); 843228103Smm 844185029Spjd return (err); 845168404Spjd} 846228103Smm/* FreeBSD ioctl compat end */ 847228103Smm#endif /* __FreeBSD__ */ 848168404Spjd 849168404Spjd/* 850185029Spjd * The unique space in the head dataset can be calculated by subtracting 851185029Spjd * the space used in the most recent snapshot, that is still being used 852185029Spjd * in this file system, from the space currently in use. To figure out 853185029Spjd * the space in the most recent snapshot still in use, we need to take 854185029Spjd * the total space used in the snapshot and subtract out the space that 855185029Spjd * has been freed up since the snapshot was taken. 856185029Spjd */ 857248571Smmvoid 858185029Spjddsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 859185029Spjd{ 860185029Spjd uint64_t mrs_used; 861185029Spjd uint64_t dlused, dlcomp, dluncomp; 862185029Spjd 863219089Spjd ASSERT(!dsl_dataset_is_snapshot(ds)); 864185029Spjd 865185029Spjd if (ds->ds_phys->ds_prev_snap_obj != 0) 866236884Smm mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; 867185029Spjd else 868185029Spjd mrs_used = 0; 869185029Spjd 870219089Spjd dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 871185029Spjd 872185029Spjd ASSERT3U(dlused, <=, mrs_used); 873185029Spjd ds->ds_phys->ds_unique_bytes = 874236884Smm ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); 875185029Spjd 876219089Spjd if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 877185029Spjd SPA_VERSION_UNIQUE_ACCURATE) 878185029Spjd ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 879185029Spjd} 880185029Spjd 881248571Smmvoid 882248571Smmdsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 883219089Spjd dmu_tx_t *tx) 884219089Spjd{ 885209962Smm objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 886209962Smm uint64_t count; 887209962Smm int err; 888209962Smm 889209962Smm ASSERT(ds->ds_phys->ds_num_children >= 2); 890209962Smm err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 891209962Smm /* 892209962Smm * The err should not be ENOENT, but a bug in a previous version 893209962Smm * of the code could cause upgrade_clones_cb() to not set 894209962Smm * ds_next_snap_obj when it should, leading to a missing entry. 895209962Smm * If we knew that the pool was created after 896209962Smm * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 897209962Smm * ENOENT. However, at least we can check that we don't have 898209962Smm * too many entries in the next_clones_obj even after failing to 899209962Smm * remove this one. 900209962Smm */ 901248571Smm if (err != ENOENT) 902240415Smm VERIFY0(err); 903248571Smm ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 904209962Smm &count)); 905209962Smm ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 906209962Smm} 907209962Smm 908248571Smm 909248571Smmblkptr_t * 910248571Smmdsl_dataset_get_blkptr(dsl_dataset_t *ds) 911219089Spjd{ 912248571Smm return (&ds->ds_phys->ds_bp); 913219089Spjd} 914219089Spjd 915248571Smmvoid 916248571Smmdsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 917219089Spjd{ 918248571Smm ASSERT(dmu_tx_is_syncing(tx)); 919248571Smm /* If it's the meta-objset, set dp_meta_rootbp */ 920248571Smm if (ds == NULL) { 921248571Smm tx->tx_pool->dp_meta_rootbp = *bp; 922219089Spjd } else { 923248571Smm dmu_buf_will_dirty(ds->ds_dbuf, tx); 924248571Smm ds->ds_phys->ds_bp = *bp; 925219089Spjd } 926219089Spjd} 927219089Spjd 928248571Smmspa_t * 929248571Smmdsl_dataset_get_spa(dsl_dataset_t *ds) 930219089Spjd{ 931248571Smm return (ds->ds_dir->dd_pool->dp_spa); 932219089Spjd} 933219089Spjd 934185029Spjdvoid 935248571Smmdsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 936185029Spjd{ 937248571Smm dsl_pool_t *dp; 938168404Spjd 939248571Smm if (ds == NULL) /* this is the meta-objset */ 940219089Spjd return; 941219089Spjd 942248571Smm ASSERT(ds->ds_objset != NULL); 943185029Spjd 944248571Smm if (ds->ds_phys->ds_next_snap_obj != 0) 945248571Smm panic("dirtying snapshot!"); 946219089Spjd 947248571Smm dp = ds->ds_dir->dd_pool; 948219089Spjd 949248571Smm if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 950248571Smm /* up the hold count until we can be written out */ 951248571Smm dmu_buf_add_ref(ds->ds_dbuf, ds); 952185029Spjd } 953248571Smm} 954185029Spjd 955248571Smmboolean_t 956248571Smmdsl_dataset_is_dirty(dsl_dataset_t *ds) 957248571Smm{ 958248571Smm for (int t = 0; t < TXG_SIZE; t++) { 959248571Smm if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 960248571Smm ds, t)) 961248571Smm return (B_TRUE); 962168404Spjd } 963248571Smm return (B_FALSE); 964185029Spjd} 965168404Spjd 966185029Spjdstatic int 967185029Spjddsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 968185029Spjd{ 969185029Spjd uint64_t asize; 970185029Spjd 971185029Spjd if (!dmu_tx_is_syncing(tx)) 972185029Spjd return (0); 973185029Spjd 974185029Spjd /* 975185029Spjd * If there's an fs-only reservation, any blocks that might become 976185029Spjd * owned by the snapshot dataset must be accommodated by space 977185029Spjd * outside of the reservation. 978185029Spjd */ 979219089Spjd ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 980219089Spjd asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 981219089Spjd if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 982249195Smm return (SET_ERROR(ENOSPC)); 983185029Spjd 984185029Spjd /* 985248571Smm * Propagate any reserved space for this snapshot to other 986185029Spjd * snapshot checks in this sync group. 987185029Spjd */ 988185029Spjd if (asize > 0) 989185029Spjd dsl_dir_willuse_space(ds->ds_dir, asize, tx); 990185029Spjd 991185029Spjd return (0); 992168404Spjd} 993168404Spjd 994248571Smmtypedef struct dsl_dataset_snapshot_arg { 995248571Smm nvlist_t *ddsa_snaps; 996248571Smm nvlist_t *ddsa_props; 997248571Smm nvlist_t *ddsa_errors; 998265744Sdelphij cred_t *ddsa_cr; 999248571Smm} dsl_dataset_snapshot_arg_t; 1000248571Smm 1001168404Spjdint 1002248571Smmdsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, 1003265744Sdelphij dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr) 1004168404Spjd{ 1005248571Smm int error; 1006168404Spjd uint64_t value; 1007168404Spjd 1008248571Smm ds->ds_trysnap_txg = tx->tx_txg; 1009248571Smm 1010248571Smm if (!dmu_tx_is_syncing(tx)) 1011248571Smm return (0); 1012248571Smm 1013168404Spjd /* 1014168404Spjd * We don't allow multiple snapshots of the same txg. If there 1015168404Spjd * is already one, try again. 1016168404Spjd */ 1017168404Spjd if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1018249195Smm return (SET_ERROR(EAGAIN)); 1019168404Spjd 1020168404Spjd /* 1021248571Smm * Check for conflicting snapshot name. 1022168404Spjd */ 1023248571Smm error = dsl_dataset_snap_lookup(ds, snapname, &value); 1024248571Smm if (error == 0) 1025249195Smm return (SET_ERROR(EEXIST)); 1026248571Smm if (error != ENOENT) 1027248571Smm return (error); 1028168404Spjd 1029253819Sdelphij /* 1030253819Sdelphij * We don't allow taking snapshots of inconsistent datasets, such as 1031253819Sdelphij * those into which we are currently receiving. However, if we are 1032253819Sdelphij * creating this snapshot as part of a receive, this check will be 1033253819Sdelphij * executed atomically with respect to the completion of the receive 1034253819Sdelphij * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this 1035253819Sdelphij * case we ignore this, knowing it will be fixed up for us shortly in 1036253819Sdelphij * dmu_recv_end_sync(). 1037253819Sdelphij */ 1038253819Sdelphij if (!recv && DS_IS_INCONSISTENT(ds)) 1039253819Sdelphij return (SET_ERROR(EBUSY)); 1040253819Sdelphij 1041265744Sdelphij /* 1042265744Sdelphij * Skip the check for temporary snapshots or if we have already checked 1043265744Sdelphij * the counts in dsl_dataset_snapshot_check. This means we really only 1044265744Sdelphij * check the count here when we're receiving a stream. 1045265744Sdelphij */ 1046265744Sdelphij if (cnt != 0 && cr != NULL) { 1047265744Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1048265744Sdelphij ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr); 1049265744Sdelphij if (error != 0) 1050265744Sdelphij return (error); 1051265744Sdelphij } 1052265744Sdelphij 1053248571Smm error = dsl_dataset_snapshot_reserve_space(ds, tx); 1054248571Smm if (error != 0) 1055248571Smm return (error); 1056168498Spjd 1057168404Spjd return (0); 1058168404Spjd} 1059168404Spjd 1060248571Smmstatic int 1061248571Smmdsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) 1062248571Smm{ 1063248571Smm dsl_dataset_snapshot_arg_t *ddsa = arg; 1064248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1065248571Smm nvpair_t *pair; 1066248571Smm int rv = 0; 1067248571Smm 1068265744Sdelphij /* 1069265744Sdelphij * Pre-compute how many total new snapshots will be created for each 1070265744Sdelphij * level in the tree and below. This is needed for validating the 1071265744Sdelphij * snapshot limit when either taking a recursive snapshot or when 1072265744Sdelphij * taking multiple snapshots. 1073265744Sdelphij * 1074265744Sdelphij * The problem is that the counts are not actually adjusted when 1075265744Sdelphij * we are checking, only when we finally sync. For a single snapshot, 1076265744Sdelphij * this is easy, the count will increase by 1 at each node up the tree, 1077265744Sdelphij * but its more complicated for the recursive/multiple snapshot case. 1078265744Sdelphij * 1079265744Sdelphij * The dsl_fs_ss_limit_check function does recursively check the count 1080265744Sdelphij * at each level up the tree but since it is validating each snapshot 1081265744Sdelphij * independently we need to be sure that we are validating the complete 1082265744Sdelphij * count for the entire set of snapshots. We do this by rolling up the 1083265744Sdelphij * counts for each component of the name into an nvlist and then 1084265744Sdelphij * checking each of those cases with the aggregated count. 1085265744Sdelphij * 1086265744Sdelphij * This approach properly handles not only the recursive snapshot 1087265744Sdelphij * case (where we get all of those on the ddsa_snaps list) but also 1088265744Sdelphij * the sibling case (e.g. snapshot a/b and a/c so that we will also 1089265744Sdelphij * validate the limit on 'a' using a count of 2). 1090265744Sdelphij * 1091265744Sdelphij * We validate the snapshot names in the third loop and only report 1092265744Sdelphij * name errors once. 1093265744Sdelphij */ 1094265744Sdelphij if (dmu_tx_is_syncing(tx)) { 1095265744Sdelphij nvlist_t *cnt_track = NULL; 1096265744Sdelphij cnt_track = fnvlist_alloc(); 1097265744Sdelphij 1098265744Sdelphij /* Rollup aggregated counts into the cnt_track list */ 1099265744Sdelphij for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1100265744Sdelphij pair != NULL; 1101265744Sdelphij pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1102265744Sdelphij char *pdelim; 1103265744Sdelphij uint64_t val; 1104265744Sdelphij char nm[MAXPATHLEN]; 1105265744Sdelphij 1106265744Sdelphij (void) strlcpy(nm, nvpair_name(pair), sizeof (nm)); 1107265744Sdelphij pdelim = strchr(nm, '@'); 1108265744Sdelphij if (pdelim == NULL) 1109265744Sdelphij continue; 1110265744Sdelphij *pdelim = '\0'; 1111265744Sdelphij 1112265744Sdelphij do { 1113265744Sdelphij if (nvlist_lookup_uint64(cnt_track, nm, 1114265744Sdelphij &val) == 0) { 1115265744Sdelphij /* update existing entry */ 1116265744Sdelphij fnvlist_add_uint64(cnt_track, nm, 1117265744Sdelphij val + 1); 1118265744Sdelphij } else { 1119265744Sdelphij /* add to list */ 1120265744Sdelphij fnvlist_add_uint64(cnt_track, nm, 1); 1121265744Sdelphij } 1122265744Sdelphij 1123265744Sdelphij pdelim = strrchr(nm, '/'); 1124265744Sdelphij if (pdelim != NULL) 1125265744Sdelphij *pdelim = '\0'; 1126265744Sdelphij } while (pdelim != NULL); 1127265744Sdelphij } 1128265744Sdelphij 1129265744Sdelphij /* Check aggregated counts at each level */ 1130265744Sdelphij for (pair = nvlist_next_nvpair(cnt_track, NULL); 1131265744Sdelphij pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) { 1132265744Sdelphij int error = 0; 1133265744Sdelphij char *name; 1134265744Sdelphij uint64_t cnt = 0; 1135265744Sdelphij dsl_dataset_t *ds; 1136265744Sdelphij 1137265744Sdelphij name = nvpair_name(pair); 1138265744Sdelphij cnt = fnvpair_value_uint64(pair); 1139265744Sdelphij ASSERT(cnt > 0); 1140265744Sdelphij 1141265744Sdelphij error = dsl_dataset_hold(dp, name, FTAG, &ds); 1142265744Sdelphij if (error == 0) { 1143265744Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1144265744Sdelphij ZFS_PROP_SNAPSHOT_LIMIT, NULL, 1145265744Sdelphij ddsa->ddsa_cr); 1146265744Sdelphij dsl_dataset_rele(ds, FTAG); 1147265744Sdelphij } 1148265744Sdelphij 1149265744Sdelphij if (error != 0) { 1150265744Sdelphij if (ddsa->ddsa_errors != NULL) 1151265744Sdelphij fnvlist_add_int32(ddsa->ddsa_errors, 1152265744Sdelphij name, error); 1153265744Sdelphij rv = error; 1154265744Sdelphij /* only report one error for this check */ 1155265744Sdelphij break; 1156265744Sdelphij } 1157265744Sdelphij } 1158265744Sdelphij nvlist_free(cnt_track); 1159265744Sdelphij } 1160265744Sdelphij 1161248571Smm for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1162248571Smm pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1163248571Smm int error = 0; 1164248571Smm dsl_dataset_t *ds; 1165248571Smm char *name, *atp; 1166248571Smm char dsname[MAXNAMELEN]; 1167248571Smm 1168248571Smm name = nvpair_name(pair); 1169248571Smm if (strlen(name) >= MAXNAMELEN) 1170249195Smm error = SET_ERROR(ENAMETOOLONG); 1171248571Smm if (error == 0) { 1172248571Smm atp = strchr(name, '@'); 1173248571Smm if (atp == NULL) 1174249195Smm error = SET_ERROR(EINVAL); 1175248571Smm if (error == 0) 1176248571Smm (void) strlcpy(dsname, name, atp - name + 1); 1177248571Smm } 1178248571Smm if (error == 0) 1179248571Smm error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 1180248571Smm if (error == 0) { 1181265744Sdelphij /* passing 0/NULL skips dsl_fs_ss_limit_check */ 1182248571Smm error = dsl_dataset_snapshot_check_impl(ds, 1183265744Sdelphij atp + 1, tx, B_FALSE, 0, NULL); 1184248571Smm dsl_dataset_rele(ds, FTAG); 1185248571Smm } 1186248571Smm 1187248571Smm if (error != 0) { 1188248571Smm if (ddsa->ddsa_errors != NULL) { 1189248571Smm fnvlist_add_int32(ddsa->ddsa_errors, 1190248571Smm name, error); 1191248571Smm } 1192248571Smm rv = error; 1193248571Smm } 1194248571Smm } 1195265744Sdelphij 1196248571Smm return (rv); 1197248571Smm} 1198248571Smm 1199168404Spjdvoid 1200248571Smmdsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, 1201248571Smm dmu_tx_t *tx) 1202168404Spjd{ 1203248571Smm static zil_header_t zero_zil; 1204248571Smm 1205168404Spjd dsl_pool_t *dp = ds->ds_dir->dd_pool; 1206168404Spjd dmu_buf_t *dbuf; 1207168404Spjd dsl_dataset_phys_t *dsphys; 1208185029Spjd uint64_t dsobj, crtxg; 1209168404Spjd objset_t *mos = dp->dp_meta_objset; 1210248571Smm objset_t *os; 1211168404Spjd 1212248571Smm ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1213168404Spjd 1214185029Spjd /* 1215248571Smm * If we are on an old pool, the zil must not be active, in which 1216248571Smm * case it will be zeroed. Usually zil_suspend() accomplishes this. 1217248571Smm */ 1218248571Smm ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || 1219248571Smm dmu_objset_from_ds(ds, &os) != 0 || 1220248571Smm bcmp(&os->os_phys->os_zil_header, &zero_zil, 1221248571Smm sizeof (zero_zil)) == 0); 1222248571Smm 1223265744Sdelphij dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); 1224248571Smm 1225248571Smm /* 1226185029Spjd * The origin's ds_creation_txg has to be < TXG_INITIAL 1227185029Spjd */ 1228185029Spjd if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1229185029Spjd crtxg = 1; 1230185029Spjd else 1231185029Spjd crtxg = tx->tx_txg; 1232185029Spjd 1233168404Spjd dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1234168404Spjd DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1235248571Smm VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1236168404Spjd dmu_buf_will_dirty(dbuf, tx); 1237168404Spjd dsphys = dbuf->db_data; 1238185029Spjd bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1239168404Spjd dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1240168404Spjd dsphys->ds_fsid_guid = unique_create(); 1241236823Spjd do { 1242236823Spjd (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1243236823Spjd sizeof (dsphys->ds_guid)); 1244236823Spjd } while (dsphys->ds_guid == 0); 1245168404Spjd dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1246168404Spjd dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1247168404Spjd dsphys->ds_next_snap_obj = ds->ds_object; 1248168404Spjd dsphys->ds_num_children = 1; 1249168404Spjd dsphys->ds_creation_time = gethrestime_sec(); 1250185029Spjd dsphys->ds_creation_txg = crtxg; 1251168404Spjd dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1252236884Smm dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes; 1253168404Spjd dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1254168404Spjd dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1255168404Spjd dsphys->ds_flags = ds->ds_phys->ds_flags; 1256168404Spjd dsphys->ds_bp = ds->ds_phys->ds_bp; 1257168404Spjd dmu_buf_rele(dbuf, FTAG); 1258168404Spjd 1259168404Spjd ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1260168404Spjd if (ds->ds_prev) { 1261185029Spjd uint64_t next_clones_obj = 1262185029Spjd ds->ds_prev->ds_phys->ds_next_clones_obj; 1263168404Spjd ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1264168404Spjd ds->ds_object || 1265168404Spjd ds->ds_prev->ds_phys->ds_num_children > 1); 1266168404Spjd if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1267168404Spjd dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1268168404Spjd ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1269168404Spjd ds->ds_prev->ds_phys->ds_creation_txg); 1270168404Spjd ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1271185029Spjd } else if (next_clones_obj != 0) { 1272248571Smm dsl_dataset_remove_from_next_clones(ds->ds_prev, 1273209962Smm dsphys->ds_next_snap_obj, tx); 1274248571Smm VERIFY0(zap_add_int(mos, 1275185029Spjd next_clones_obj, dsobj, tx)); 1276168404Spjd } 1277168404Spjd } 1278168404Spjd 1279185029Spjd /* 1280185029Spjd * If we have a reference-reservation on this dataset, we will 1281185029Spjd * need to increase the amount of refreservation being charged 1282185029Spjd * since our unique space is going to zero. 1283185029Spjd */ 1284185029Spjd if (ds->ds_reserved) { 1285219089Spjd int64_t delta; 1286219089Spjd ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 1287219089Spjd delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 1288185029Spjd dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1289219089Spjd delta, 0, 0, tx); 1290185029Spjd } 1291185029Spjd 1292168404Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 1293219089Spjd ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, 1294219089Spjd UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); 1295219089Spjd dsl_deadlist_close(&ds->ds_deadlist); 1296219089Spjd dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 1297219089Spjd dsl_deadlist_add_key(&ds->ds_deadlist, 1298219089Spjd ds->ds_phys->ds_prev_snap_txg, tx); 1299219089Spjd 1300185029Spjd ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1301168404Spjd ds->ds_phys->ds_prev_snap_obj = dsobj; 1302185029Spjd ds->ds_phys->ds_prev_snap_txg = crtxg; 1303168404Spjd ds->ds_phys->ds_unique_bytes = 0; 1304185029Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1305185029Spjd ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1306168404Spjd 1307248571Smm VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1308248571Smm snapname, 8, 1, &dsobj, tx)); 1309168404Spjd 1310168404Spjd if (ds->ds_prev) 1311248571Smm dsl_dataset_rele(ds->ds_prev, ds); 1312248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 1313185029Spjd ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1314185029Spjd 1315219089Spjd dsl_scan_ds_snapshotted(ds, tx); 1316185029Spjd 1317219089Spjd dsl_dir_snap_cmtime_update(ds->ds_dir); 1318219089Spjd 1319248571Smm spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); 1320168404Spjd} 1321168404Spjd 1322248571Smmstatic void 1323248571Smmdsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) 1324248571Smm{ 1325248571Smm dsl_dataset_snapshot_arg_t *ddsa = arg; 1326248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1327248571Smm nvpair_t *pair; 1328248571Smm 1329248571Smm for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1330248571Smm pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1331248571Smm dsl_dataset_t *ds; 1332248571Smm char *name, *atp; 1333248571Smm char dsname[MAXNAMELEN]; 1334248571Smm 1335248571Smm name = nvpair_name(pair); 1336248571Smm atp = strchr(name, '@'); 1337248571Smm (void) strlcpy(dsname, name, atp - name + 1); 1338248571Smm VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); 1339248571Smm 1340248571Smm dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); 1341248571Smm if (ddsa->ddsa_props != NULL) { 1342248571Smm dsl_props_set_sync_impl(ds->ds_prev, 1343248571Smm ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); 1344248571Smm } 1345248571Smm dsl_dataset_rele(ds, FTAG); 1346248571Smm } 1347248571Smm} 1348248571Smm 1349248571Smm/* 1350248571Smm * The snapshots must all be in the same pool. 1351248571Smm * All-or-nothing: if there are any failures, nothing will be modified. 1352248571Smm */ 1353248571Smmint 1354248571Smmdsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) 1355248571Smm{ 1356248571Smm dsl_dataset_snapshot_arg_t ddsa; 1357248571Smm nvpair_t *pair; 1358248571Smm boolean_t needsuspend; 1359248571Smm int error; 1360248571Smm spa_t *spa; 1361248571Smm char *firstname; 1362248571Smm nvlist_t *suspended = NULL; 1363248571Smm 1364248571Smm pair = nvlist_next_nvpair(snaps, NULL); 1365248571Smm if (pair == NULL) 1366248571Smm return (0); 1367248571Smm firstname = nvpair_name(pair); 1368248571Smm 1369248571Smm error = spa_open(firstname, &spa, FTAG); 1370248571Smm if (error != 0) 1371248571Smm return (error); 1372248571Smm needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1373248571Smm spa_close(spa, FTAG); 1374248571Smm 1375248571Smm if (needsuspend) { 1376248571Smm suspended = fnvlist_alloc(); 1377248571Smm for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1378248571Smm pair = nvlist_next_nvpair(snaps, pair)) { 1379248571Smm char fsname[MAXNAMELEN]; 1380248571Smm char *snapname = nvpair_name(pair); 1381248571Smm char *atp; 1382248571Smm void *cookie; 1383248571Smm 1384248571Smm atp = strchr(snapname, '@'); 1385248571Smm if (atp == NULL) { 1386249195Smm error = SET_ERROR(EINVAL); 1387248571Smm break; 1388248571Smm } 1389248571Smm (void) strlcpy(fsname, snapname, atp - snapname + 1); 1390248571Smm 1391248571Smm error = zil_suspend(fsname, &cookie); 1392248571Smm if (error != 0) 1393248571Smm break; 1394248571Smm fnvlist_add_uint64(suspended, fsname, 1395248571Smm (uintptr_t)cookie); 1396248571Smm } 1397248571Smm } 1398248571Smm 1399248571Smm ddsa.ddsa_snaps = snaps; 1400248571Smm ddsa.ddsa_props = props; 1401248571Smm ddsa.ddsa_errors = errors; 1402265744Sdelphij ddsa.ddsa_cr = CRED(); 1403248571Smm 1404248571Smm if (error == 0) { 1405248571Smm error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, 1406248571Smm dsl_dataset_snapshot_sync, &ddsa, 1407269006Sdelphij fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL); 1408248571Smm } 1409248571Smm 1410248571Smm if (suspended != NULL) { 1411248571Smm for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; 1412248571Smm pair = nvlist_next_nvpair(suspended, pair)) { 1413248571Smm zil_resume((void *)(uintptr_t) 1414248571Smm fnvpair_value_uint64(pair)); 1415248571Smm } 1416248571Smm fnvlist_free(suspended); 1417248571Smm } 1418248571Smm 1419248571Smm#ifdef __FreeBSD__ 1420248571Smm#ifdef _KERNEL 1421248571Smm if (error == 0) { 1422248571Smm for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1423248571Smm pair = nvlist_next_nvpair(snaps, pair)) { 1424248571Smm char *snapname = nvpair_name(pair); 1425248571Smm zvol_create_minors(snapname); 1426248571Smm } 1427248571Smm } 1428248571Smm#endif 1429248571Smm#endif 1430248571Smm return (error); 1431248571Smm} 1432248571Smm 1433248571Smmtypedef struct dsl_dataset_snapshot_tmp_arg { 1434248571Smm const char *ddsta_fsname; 1435248571Smm const char *ddsta_snapname; 1436248571Smm minor_t ddsta_cleanup_minor; 1437248571Smm const char *ddsta_htag; 1438248571Smm} dsl_dataset_snapshot_tmp_arg_t; 1439248571Smm 1440248571Smmstatic int 1441248571Smmdsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) 1442248571Smm{ 1443248571Smm dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1444248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1445248571Smm dsl_dataset_t *ds; 1446248571Smm int error; 1447248571Smm 1448248571Smm error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); 1449248571Smm if (error != 0) 1450248571Smm return (error); 1451248571Smm 1452265744Sdelphij /* NULL cred means no limit check for tmp snapshot */ 1453253819Sdelphij error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, 1454265744Sdelphij tx, B_FALSE, 0, NULL); 1455248571Smm if (error != 0) { 1456248571Smm dsl_dataset_rele(ds, FTAG); 1457248571Smm return (error); 1458248571Smm } 1459248571Smm 1460248571Smm if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1461248571Smm dsl_dataset_rele(ds, FTAG); 1462249195Smm return (SET_ERROR(ENOTSUP)); 1463248571Smm } 1464248571Smm error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1465248571Smm B_TRUE, tx); 1466248571Smm if (error != 0) { 1467248571Smm dsl_dataset_rele(ds, FTAG); 1468248571Smm return (error); 1469248571Smm } 1470248571Smm 1471248571Smm dsl_dataset_rele(ds, FTAG); 1472248571Smm return (0); 1473248571Smm} 1474248571Smm 1475248571Smmstatic void 1476248571Smmdsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1477248571Smm{ 1478248571Smm dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1479248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1480248571Smm dsl_dataset_t *ds; 1481248571Smm 1482248571Smm VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1483248571Smm 1484248571Smm dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1485248571Smm dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1486248571Smm ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1487248571Smm dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1488248571Smm 1489248571Smm dsl_dataset_rele(ds, FTAG); 1490248571Smm} 1491248571Smm 1492248571Smmint 1493248571Smmdsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1494248571Smm minor_t cleanup_minor, const char *htag) 1495248571Smm{ 1496248571Smm dsl_dataset_snapshot_tmp_arg_t ddsta; 1497248571Smm int error; 1498248571Smm spa_t *spa; 1499248571Smm boolean_t needsuspend; 1500248571Smm void *cookie; 1501248571Smm 1502248571Smm ddsta.ddsta_fsname = fsname; 1503248571Smm ddsta.ddsta_snapname = snapname; 1504248571Smm ddsta.ddsta_cleanup_minor = cleanup_minor; 1505248571Smm ddsta.ddsta_htag = htag; 1506248571Smm 1507248571Smm error = spa_open(fsname, &spa, FTAG); 1508248571Smm if (error != 0) 1509248571Smm return (error); 1510248571Smm needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1511248571Smm spa_close(spa, FTAG); 1512248571Smm 1513248571Smm if (needsuspend) { 1514248571Smm error = zil_suspend(fsname, &cookie); 1515248571Smm if (error != 0) 1516248571Smm return (error); 1517248571Smm } 1518248571Smm 1519248571Smm error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1520269006Sdelphij dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED); 1521248571Smm 1522248571Smm if (needsuspend) 1523248571Smm zil_resume(cookie); 1524248571Smm return (error); 1525248571Smm} 1526248571Smm 1527248571Smm 1528168404Spjdvoid 1529168404Spjddsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1530168404Spjd{ 1531168404Spjd ASSERT(dmu_tx_is_syncing(tx)); 1532219089Spjd ASSERT(ds->ds_objset != NULL); 1533168404Spjd ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1534168404Spjd 1535185029Spjd /* 1536185029Spjd * in case we had to change ds_fsid_guid when we opened it, 1537185029Spjd * sync it out now. 1538185029Spjd */ 1539185029Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 1540185029Spjd ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1541185029Spjd 1542219089Spjd dmu_objset_sync(ds->ds_objset, zio, tx); 1543168404Spjd} 1544168404Spjd 1545228103Smmstatic void 1546228103Smmget_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1547228103Smm{ 1548228103Smm uint64_t count = 0; 1549228103Smm objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1550228103Smm zap_cursor_t zc; 1551228103Smm zap_attribute_t za; 1552248571Smm nvlist_t *propval = fnvlist_alloc(); 1553248571Smm nvlist_t *val = fnvlist_alloc(); 1554228103Smm 1555248571Smm ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1556228103Smm 1557228103Smm /* 1558248571Smm * There may be missing entries in ds_next_clones_obj 1559228103Smm * due to a bug in a previous version of the code. 1560228103Smm * Only trust it if it has the right number of entries. 1561228103Smm */ 1562228103Smm if (ds->ds_phys->ds_next_clones_obj != 0) { 1563262320Sdelphij VERIFY0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1564228103Smm &count)); 1565228103Smm } 1566248571Smm if (count != ds->ds_phys->ds_num_children - 1) 1567228103Smm goto fail; 1568228103Smm for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); 1569228103Smm zap_cursor_retrieve(&zc, &za) == 0; 1570228103Smm zap_cursor_advance(&zc)) { 1571228103Smm dsl_dataset_t *clone; 1572228103Smm char buf[ZFS_MAXNAMELEN]; 1573248571Smm VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1574248571Smm za.za_first_integer, FTAG, &clone)); 1575228103Smm dsl_dir_name(clone->ds_dir, buf); 1576248571Smm fnvlist_add_boolean(val, buf); 1577228103Smm dsl_dataset_rele(clone, FTAG); 1578228103Smm } 1579228103Smm zap_cursor_fini(&zc); 1580248571Smm fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1581248571Smm fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1582228103Smmfail: 1583228103Smm nvlist_free(val); 1584228103Smm nvlist_free(propval); 1585228103Smm} 1586228103Smm 1587168404Spjdvoid 1588168404Spjddsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1589168404Spjd{ 1590248571Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 1591223623Smm uint64_t refd, avail, uobjs, aobjs, ratio; 1592185029Spjd 1593248571Smm ASSERT(dsl_pool_config_held(dp)); 1594168404Spjd 1595248571Smm ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1596248571Smm (ds->ds_phys->ds_uncompressed_bytes * 100 / 1597248571Smm ds->ds_phys->ds_compressed_bytes); 1598248571Smm 1599248571Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1600248571Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1601248571Smm ds->ds_phys->ds_uncompressed_bytes); 1602248571Smm 1603248571Smm if (dsl_dataset_is_snapshot(ds)) { 1604248571Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1605248571Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1606248571Smm ds->ds_phys->ds_unique_bytes); 1607248571Smm get_clones_stat(ds, nv); 1608248571Smm } else { 1609268659Sdelphij if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { 1610268659Sdelphij char buf[MAXNAMELEN]; 1611268659Sdelphij dsl_dataset_name(ds->ds_prev, buf); 1612268659Sdelphij dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); 1613268659Sdelphij } 1614268659Sdelphij 1615248571Smm dsl_dir_stats(ds->ds_dir, nv); 1616248571Smm } 1617248571Smm 1618185029Spjd dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1619185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1620185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1621185029Spjd 1622168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1623168404Spjd ds->ds_phys->ds_creation_time); 1624168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1625168404Spjd ds->ds_phys->ds_creation_txg); 1626185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1627185029Spjd ds->ds_quota); 1628185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1629185029Spjd ds->ds_reserved); 1630185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1631185029Spjd ds->ds_phys->ds_guid); 1632219089Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1633219089Spjd ds->ds_phys->ds_unique_bytes); 1634219089Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1635219089Spjd ds->ds_object); 1636219089Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1637219089Spjd ds->ds_userrefs); 1638219089Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1639219089Spjd DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1640168404Spjd 1641228103Smm if (ds->ds_phys->ds_prev_snap_obj != 0) { 1642228103Smm uint64_t written, comp, uncomp; 1643228103Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 1644228103Smm dsl_dataset_t *prev; 1645228103Smm 1646228103Smm int err = dsl_dataset_hold_obj(dp, 1647228103Smm ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); 1648228103Smm if (err == 0) { 1649228103Smm err = dsl_dataset_space_written(prev, ds, &written, 1650228103Smm &comp, &uncomp); 1651228103Smm dsl_dataset_rele(prev, FTAG); 1652228103Smm if (err == 0) { 1653228103Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1654228103Smm written); 1655228103Smm } 1656228103Smm } 1657228103Smm } 1658168404Spjd} 1659168404Spjd 1660168404Spjdvoid 1661168404Spjddsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1662168404Spjd{ 1663248571Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 1664248571Smm ASSERT(dsl_pool_config_held(dp)); 1665248571Smm 1666168404Spjd stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1667168404Spjd stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1668185029Spjd stat->dds_guid = ds->ds_phys->ds_guid; 1669248571Smm stat->dds_origin[0] = '\0'; 1670248571Smm if (dsl_dataset_is_snapshot(ds)) { 1671168404Spjd stat->dds_is_snapshot = B_TRUE; 1672168404Spjd stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1673209962Smm } else { 1674209962Smm stat->dds_is_snapshot = B_FALSE; 1675209962Smm stat->dds_num_clones = 0; 1676168404Spjd 1677248571Smm if (dsl_dir_is_clone(ds->ds_dir)) { 1678248571Smm dsl_dataset_t *ods; 1679168404Spjd 1680248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 1681248571Smm ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1682248571Smm dsl_dataset_name(ods, stat->dds_origin); 1683248571Smm dsl_dataset_rele(ods, FTAG); 1684248571Smm } 1685168404Spjd } 1686168404Spjd} 1687168404Spjd 1688168404Spjduint64_t 1689168404Spjddsl_dataset_fsid_guid(dsl_dataset_t *ds) 1690168404Spjd{ 1691185029Spjd return (ds->ds_fsid_guid); 1692168404Spjd} 1693168404Spjd 1694168404Spjdvoid 1695168404Spjddsl_dataset_space(dsl_dataset_t *ds, 1696168404Spjd uint64_t *refdbytesp, uint64_t *availbytesp, 1697168404Spjd uint64_t *usedobjsp, uint64_t *availobjsp) 1698168404Spjd{ 1699236884Smm *refdbytesp = ds->ds_phys->ds_referenced_bytes; 1700168404Spjd *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1701185029Spjd if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1702185029Spjd *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1703185029Spjd if (ds->ds_quota != 0) { 1704185029Spjd /* 1705185029Spjd * Adjust available bytes according to refquota 1706185029Spjd */ 1707185029Spjd if (*refdbytesp < ds->ds_quota) 1708185029Spjd *availbytesp = MIN(*availbytesp, 1709185029Spjd ds->ds_quota - *refdbytesp); 1710185029Spjd else 1711185029Spjd *availbytesp = 0; 1712185029Spjd } 1713268649Sdelphij *usedobjsp = BP_GET_FILL(&ds->ds_phys->ds_bp); 1714168404Spjd *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1715168404Spjd} 1716168404Spjd 1717185029Spjdboolean_t 1718253820Sdelphijdsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) 1719185029Spjd{ 1720185029Spjd dsl_pool_t *dp = ds->ds_dir->dd_pool; 1721185029Spjd 1722248571Smm ASSERT(dsl_pool_config_held(dp)); 1723253820Sdelphij if (snap == NULL) 1724185029Spjd return (B_FALSE); 1725185029Spjd if (ds->ds_phys->ds_bp.blk_birth > 1726253820Sdelphij snap->ds_phys->ds_creation_txg) { 1727253820Sdelphij objset_t *os, *os_snap; 1728219089Spjd /* 1729219089Spjd * It may be that only the ZIL differs, because it was 1730219089Spjd * reset in the head. Don't count that as being 1731219089Spjd * modified. 1732219089Spjd */ 1733219089Spjd if (dmu_objset_from_ds(ds, &os) != 0) 1734219089Spjd return (B_TRUE); 1735253820Sdelphij if (dmu_objset_from_ds(snap, &os_snap) != 0) 1736219089Spjd return (B_TRUE); 1737219089Spjd return (bcmp(&os->os_phys->os_meta_dnode, 1738253820Sdelphij &os_snap->os_phys->os_meta_dnode, 1739219089Spjd sizeof (os->os_phys->os_meta_dnode)) != 0); 1740219089Spjd } 1741185029Spjd return (B_FALSE); 1742185029Spjd} 1743185029Spjd 1744248571Smmtypedef struct dsl_dataset_rename_snapshot_arg { 1745248571Smm const char *ddrsa_fsname; 1746248571Smm const char *ddrsa_oldsnapname; 1747248571Smm const char *ddrsa_newsnapname; 1748248571Smm boolean_t ddrsa_recursive; 1749248571Smm dmu_tx_t *ddrsa_tx; 1750248571Smm} dsl_dataset_rename_snapshot_arg_t; 1751248571Smm 1752168404Spjd/* ARGSUSED */ 1753168404Spjdstatic int 1754248571Smmdsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 1755248571Smm dsl_dataset_t *hds, void *arg) 1756168404Spjd{ 1757248571Smm dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1758248571Smm int error; 1759168404Spjd uint64_t val; 1760168404Spjd 1761248571Smm error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1762248571Smm if (error != 0) { 1763248571Smm /* ignore nonexistent snapshots */ 1764248571Smm return (error == ENOENT ? 0 : error); 1765248571Smm } 1766168404Spjd 1767248571Smm /* new name should not exist */ 1768248571Smm error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 1769248571Smm if (error == 0) 1770249195Smm error = SET_ERROR(EEXIST); 1771248571Smm else if (error == ENOENT) 1772248571Smm error = 0; 1773168404Spjd 1774168676Spjd /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1775248571Smm if (dsl_dir_namelen(hds->ds_dir) + 1 + 1776248571Smm strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) 1777249195Smm error = SET_ERROR(ENAMETOOLONG); 1778168676Spjd 1779248571Smm return (error); 1780168404Spjd} 1781168404Spjd 1782248571Smmstatic int 1783248571Smmdsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 1784168404Spjd{ 1785248571Smm dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1786248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1787168404Spjd dsl_dataset_t *hds; 1788248571Smm int error; 1789168404Spjd 1790248571Smm error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 1791248571Smm if (error != 0) 1792248571Smm return (error); 1793168404Spjd 1794248571Smm if (ddrsa->ddrsa_recursive) { 1795248571Smm error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1796248571Smm dsl_dataset_rename_snapshot_check_impl, ddrsa, 1797248571Smm DS_FIND_CHILDREN); 1798248571Smm } else { 1799248571Smm error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 1800248571Smm } 1801248571Smm dsl_dataset_rele(hds, FTAG); 1802248571Smm return (error); 1803248571Smm} 1804168404Spjd 1805248571Smmstatic int 1806248571Smmdsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 1807248571Smm dsl_dataset_t *hds, void *arg) 1808248571Smm{ 1809248571Smm#ifdef __FreeBSD__ 1810248571Smm#ifdef _KERNEL 1811248571Smm char *oldname, *newname; 1812248571Smm#endif 1813248571Smm#endif 1814248571Smm dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1815248571Smm dsl_dataset_t *ds; 1816248571Smm uint64_t val; 1817248571Smm dmu_tx_t *tx = ddrsa->ddrsa_tx; 1818248571Smm int error; 1819248571Smm 1820248571Smm error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1821248571Smm ASSERT(error == 0 || error == ENOENT); 1822248571Smm if (error == ENOENT) { 1823248571Smm /* ignore nonexistent snapshots */ 1824248571Smm return (0); 1825248571Smm } 1826248571Smm 1827248571Smm VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 1828248571Smm 1829248571Smm /* log before we change the name */ 1830248571Smm spa_history_log_internal_ds(ds, "rename", tx, 1831248571Smm "-> @%s", ddrsa->ddrsa_newsnapname); 1832248571Smm 1833265744Sdelphij VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx, 1834265744Sdelphij B_FALSE)); 1835168404Spjd mutex_enter(&ds->ds_lock); 1836248571Smm (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 1837168404Spjd mutex_exit(&ds->ds_lock); 1838248571Smm VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, 1839248571Smm ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 1840248571Smm 1841248571Smm#ifdef __FreeBSD__ 1842219320Spjd#ifdef _KERNEL 1843248571Smm oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1844248571Smm newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1845248571Smm snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1846248571Smm ddrsa->ddrsa_oldsnapname); 1847248571Smm snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, 1848248571Smm ddrsa->ddrsa_newsnapname); 1849248571Smm zfsvfs_update_fromname(oldname, newname); 1850219317Spjd zvol_rename_minors(oldname, newname); 1851248571Smm kmem_free(newname, MAXPATHLEN); 1852248571Smm kmem_free(oldname, MAXPATHLEN); 1853219320Spjd#endif 1854248571Smm#endif 1855248571Smm dsl_dataset_rele(ds, FTAG); 1856168404Spjd 1857248571Smm return (0); 1858168404Spjd} 1859168404Spjd 1860248571Smmstatic void 1861248571Smmdsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 1862168676Spjd{ 1863248571Smm dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1864248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1865248571Smm dsl_dataset_t *hds; 1866168676Spjd 1867248571Smm VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 1868248571Smm ddrsa->ddrsa_tx = tx; 1869248571Smm if (ddrsa->ddrsa_recursive) { 1870248571Smm VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1871248571Smm dsl_dataset_rename_snapshot_sync_impl, ddrsa, 1872248571Smm DS_FIND_CHILDREN)); 1873248571Smm } else { 1874248571Smm VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 1875168676Spjd } 1876248571Smm dsl_dataset_rele(hds, FTAG); 1877248571Smm} 1878168676Spjd 1879248571Smmint 1880248571Smmdsl_dataset_rename_snapshot(const char *fsname, 1881248571Smm const char *oldsnapname, const char *newsnapname, boolean_t recursive) 1882248571Smm{ 1883248571Smm dsl_dataset_rename_snapshot_arg_t ddrsa; 1884168676Spjd 1885248571Smm ddrsa.ddrsa_fsname = fsname; 1886248571Smm ddrsa.ddrsa_oldsnapname = oldsnapname; 1887248571Smm ddrsa.ddrsa_newsnapname = newsnapname; 1888248571Smm ddrsa.ddrsa_recursive = recursive; 1889168676Spjd 1890248571Smm return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 1891269006Sdelphij dsl_dataset_rename_snapshot_sync, &ddrsa, 1892269006Sdelphij 1, ZFS_SPACE_CHECK_RESERVED)); 1893168676Spjd} 1894168676Spjd 1895253816Sdelphij/* 1896253816Sdelphij * If we're doing an ownership handoff, we need to make sure that there is 1897253816Sdelphij * only one long hold on the dataset. We're not allowed to change anything here 1898253816Sdelphij * so we don't permanently release the long hold or regular hold here. We want 1899253816Sdelphij * to do this only when syncing to avoid the dataset unexpectedly going away 1900253816Sdelphij * when we release the long hold. 1901253816Sdelphij */ 1902168676Spjdstatic int 1903253816Sdelphijdsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 1904253816Sdelphij{ 1905253816Sdelphij boolean_t held; 1906253816Sdelphij 1907253816Sdelphij if (!dmu_tx_is_syncing(tx)) 1908253816Sdelphij return (0); 1909253816Sdelphij 1910253816Sdelphij if (owner != NULL) { 1911253816Sdelphij VERIFY3P(ds->ds_owner, ==, owner); 1912253816Sdelphij dsl_dataset_long_rele(ds, owner); 1913253816Sdelphij } 1914253816Sdelphij 1915253816Sdelphij held = dsl_dataset_long_held(ds); 1916253816Sdelphij 1917253816Sdelphij if (owner != NULL) 1918253816Sdelphij dsl_dataset_long_hold(ds, owner); 1919253816Sdelphij 1920253816Sdelphij if (held) 1921253816Sdelphij return (SET_ERROR(EBUSY)); 1922253816Sdelphij 1923253816Sdelphij return (0); 1924253816Sdelphij} 1925253816Sdelphij 1926253816Sdelphijtypedef struct dsl_dataset_rollback_arg { 1927253816Sdelphij const char *ddra_fsname; 1928253816Sdelphij void *ddra_owner; 1929254587Sdelphij nvlist_t *ddra_result; 1930253816Sdelphij} dsl_dataset_rollback_arg_t; 1931253816Sdelphij 1932253816Sdelphijstatic int 1933248571Smmdsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 1934168676Spjd{ 1935253816Sdelphij dsl_dataset_rollback_arg_t *ddra = arg; 1936248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1937248571Smm dsl_dataset_t *ds; 1938248571Smm int64_t unused_refres_delta; 1939248571Smm int error; 1940168676Spjd 1941253816Sdelphij error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 1942248571Smm if (error != 0) 1943248571Smm return (error); 1944168676Spjd 1945248571Smm /* must not be a snapshot */ 1946248571Smm if (dsl_dataset_is_snapshot(ds)) { 1947248571Smm dsl_dataset_rele(ds, FTAG); 1948249195Smm return (SET_ERROR(EINVAL)); 1949168676Spjd } 1950168676Spjd 1951248571Smm /* must have a most recent snapshot */ 1952248571Smm if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { 1953248571Smm dsl_dataset_rele(ds, FTAG); 1954249195Smm return (SET_ERROR(EINVAL)); 1955248571Smm } 1956168676Spjd 1957263407Sdelphij /* must not have any bookmarks after the most recent snapshot */ 1958263407Sdelphij nvlist_t *proprequest = fnvlist_alloc(); 1959263407Sdelphij fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); 1960263407Sdelphij nvlist_t *bookmarks = fnvlist_alloc(); 1961263407Sdelphij error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); 1962263407Sdelphij fnvlist_free(proprequest); 1963263407Sdelphij if (error != 0) 1964263407Sdelphij return (error); 1965263407Sdelphij for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL); 1966263407Sdelphij pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) { 1967263407Sdelphij nvlist_t *valuenv = 1968263407Sdelphij fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), 1969263407Sdelphij zfs_prop_to_name(ZFS_PROP_CREATETXG)); 1970263407Sdelphij uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); 1971263407Sdelphij if (createtxg > ds->ds_phys->ds_prev_snap_txg) { 1972263407Sdelphij fnvlist_free(bookmarks); 1973263407Sdelphij dsl_dataset_rele(ds, FTAG); 1974263407Sdelphij return (SET_ERROR(EEXIST)); 1975263407Sdelphij } 1976263407Sdelphij } 1977263407Sdelphij fnvlist_free(bookmarks); 1978263407Sdelphij 1979253816Sdelphij error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 1980253816Sdelphij if (error != 0) { 1981248571Smm dsl_dataset_rele(ds, FTAG); 1982253816Sdelphij return (error); 1983248571Smm } 1984168676Spjd 1985248571Smm /* 1986248571Smm * Check if the snap we are rolling back to uses more than 1987248571Smm * the refquota. 1988248571Smm */ 1989248571Smm if (ds->ds_quota != 0 && 1990248571Smm ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { 1991248571Smm dsl_dataset_rele(ds, FTAG); 1992249195Smm return (SET_ERROR(EDQUOT)); 1993168676Spjd } 1994168676Spjd 1995248571Smm /* 1996248571Smm * When we do the clone swap, we will temporarily use more space 1997248571Smm * due to the refreservation (the head will no longer have any 1998248571Smm * unique space, so the entire amount of the refreservation will need 1999248571Smm * to be free). We will immediately destroy the clone, freeing 2000248571Smm * this space, but the freeing happens over many txg's. 2001248571Smm */ 2002248571Smm unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 2003248571Smm ds->ds_phys->ds_unique_bytes); 2004168676Spjd 2005248571Smm if (unused_refres_delta > 0 && 2006248571Smm unused_refres_delta > 2007248571Smm dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 2008248571Smm dsl_dataset_rele(ds, FTAG); 2009249195Smm return (SET_ERROR(ENOSPC)); 2010248571Smm } 2011168676Spjd 2012248571Smm dsl_dataset_rele(ds, FTAG); 2013185029Spjd return (0); 2014185029Spjd} 2015185029Spjd 2016248571Smmstatic void 2017248571Smmdsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 2018168404Spjd{ 2019253816Sdelphij dsl_dataset_rollback_arg_t *ddra = arg; 2020248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2021248571Smm dsl_dataset_t *ds, *clone; 2022248571Smm uint64_t cloneobj; 2023254587Sdelphij char namebuf[ZFS_MAXNAMELEN]; 2024168404Spjd 2025253816Sdelphij VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 2026219089Spjd 2027254587Sdelphij dsl_dataset_name(ds->ds_prev, namebuf); 2028254587Sdelphij fnvlist_add_string(ddra->ddra_result, "target", namebuf); 2029254587Sdelphij 2030248571Smm cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 2031248571Smm ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 2032185029Spjd 2033248571Smm VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 2034185029Spjd 2035248571Smm dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 2036248571Smm dsl_dataset_zero_zil(ds, tx); 2037219089Spjd 2038248571Smm dsl_destroy_head_sync_impl(clone, tx); 2039168404Spjd 2040248571Smm dsl_dataset_rele(clone, FTAG); 2041248571Smm dsl_dataset_rele(ds, FTAG); 2042248571Smm} 2043168404Spjd 2044253816Sdelphij/* 2045254587Sdelphij * Rolls back the given filesystem or volume to the most recent snapshot. 2046254587Sdelphij * The name of the most recent snapshot will be returned under key "target" 2047254587Sdelphij * in the result nvlist. 2048254587Sdelphij * 2049253816Sdelphij * If owner != NULL: 2050253816Sdelphij * - The existing dataset MUST be owned by the specified owner at entry 2051253816Sdelphij * - Upon return, dataset will still be held by the same owner, whether we 2052253816Sdelphij * succeed or not. 2053253816Sdelphij * 2054253816Sdelphij * This mode is required any time the existing filesystem is mounted. See 2055253816Sdelphij * notes above zfs_suspend_fs() for further details. 2056253816Sdelphij */ 2057248571Smmint 2058254587Sdelphijdsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result) 2059248571Smm{ 2060253816Sdelphij dsl_dataset_rollback_arg_t ddra; 2061253816Sdelphij 2062253816Sdelphij ddra.ddra_fsname = fsname; 2063253816Sdelphij ddra.ddra_owner = owner; 2064254587Sdelphij ddra.ddra_result = result; 2065253816Sdelphij 2066248571Smm return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 2067269006Sdelphij dsl_dataset_rollback_sync, &ddra, 2068269006Sdelphij 1, ZFS_SPACE_CHECK_RESERVED)); 2069168404Spjd} 2070168404Spjd 2071185029Spjdstruct promotenode { 2072185029Spjd list_node_t link; 2073185029Spjd dsl_dataset_t *ds; 2074185029Spjd}; 2075185029Spjd 2076248571Smmtypedef struct dsl_dataset_promote_arg { 2077248571Smm const char *ddpa_clonename; 2078248571Smm dsl_dataset_t *ddpa_clone; 2079185029Spjd list_t shared_snaps, origin_snaps, clone_snaps; 2080248571Smm dsl_dataset_t *origin_origin; /* origin of the origin */ 2081185029Spjd uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2082219089Spjd char *err_ds; 2083265744Sdelphij cred_t *cr; 2084248571Smm} dsl_dataset_promote_arg_t; 2085168404Spjd 2086185029Spjdstatic int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2087248571Smmstatic int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 2088248571Smm void *tag); 2089248571Smmstatic void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 2090185029Spjd 2091168404Spjdstatic int 2092248571Smmdsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 2093168404Spjd{ 2094248571Smm dsl_dataset_promote_arg_t *ddpa = arg; 2095248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2096248571Smm dsl_dataset_t *hds; 2097248571Smm struct promotenode *snap; 2098248571Smm dsl_dataset_t *origin_ds; 2099168404Spjd int err; 2100219089Spjd uint64_t unused; 2101265744Sdelphij uint64_t ss_mv_cnt; 2102168404Spjd 2103248571Smm err = promote_hold(ddpa, dp, FTAG); 2104248571Smm if (err != 0) 2105248571Smm return (err); 2106168404Spjd 2107248571Smm hds = ddpa->ddpa_clone; 2108168404Spjd 2109248571Smm if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 2110248571Smm promote_rele(ddpa, FTAG); 2111249195Smm return (SET_ERROR(EXDEV)); 2112248571Smm } 2113168404Spjd 2114248571Smm /* 2115248571Smm * Compute and check the amount of space to transfer. Since this is 2116248571Smm * so expensive, don't do the preliminary check. 2117248571Smm */ 2118248571Smm if (!dmu_tx_is_syncing(tx)) { 2119248571Smm promote_rele(ddpa, FTAG); 2120248571Smm return (0); 2121248571Smm } 2122248571Smm 2123248571Smm snap = list_head(&ddpa->shared_snaps); 2124248571Smm origin_ds = snap->ds; 2125248571Smm 2126185029Spjd /* compute origin's new unique space */ 2127248571Smm snap = list_tail(&ddpa->clone_snaps); 2128185029Spjd ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2129219089Spjd dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2130219089Spjd origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2131248571Smm &ddpa->unique, &unused, &unused); 2132168404Spjd 2133185029Spjd /* 2134185029Spjd * Walk the snapshots that we are moving 2135185029Spjd * 2136185029Spjd * Compute space to transfer. Consider the incremental changes 2137248571Smm * to used by each snapshot: 2138185029Spjd * (my used) = (prev's used) + (blocks born) - (blocks killed) 2139185029Spjd * So each snapshot gave birth to: 2140185029Spjd * (blocks born) = (my used) - (prev's used) + (blocks killed) 2141185029Spjd * So a sequence would look like: 2142185029Spjd * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2143185029Spjd * Which simplifies to: 2144185029Spjd * uN + kN + kN-1 + ... + k1 + k0 2145185029Spjd * Note however, if we stop before we reach the ORIGIN we get: 2146185029Spjd * uN + kN + kN-1 + ... + kM - uM-1 2147185029Spjd */ 2148265744Sdelphij ss_mv_cnt = 0; 2149248571Smm ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; 2150248571Smm ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2151248571Smm ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2152248571Smm for (snap = list_head(&ddpa->shared_snaps); snap; 2153248571Smm snap = list_next(&ddpa->shared_snaps, snap)) { 2154185029Spjd uint64_t val, dlused, dlcomp, dluncomp; 2155185029Spjd dsl_dataset_t *ds = snap->ds; 2156168404Spjd 2157265744Sdelphij ss_mv_cnt++; 2158265744Sdelphij 2159248571Smm /* 2160248571Smm * If there are long holds, we won't be able to evict 2161248571Smm * the objset. 2162248571Smm */ 2163248571Smm if (dsl_dataset_long_held(ds)) { 2164249195Smm err = SET_ERROR(EBUSY); 2165248571Smm goto out; 2166248571Smm } 2167248571Smm 2168185029Spjd /* Check that the snapshot name does not conflict */ 2169248571Smm VERIFY0(dsl_dataset_get_snapname(ds)); 2170185029Spjd err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2171219089Spjd if (err == 0) { 2172248571Smm (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 2173249195Smm err = SET_ERROR(EEXIST); 2174219089Spjd goto out; 2175219089Spjd } 2176185029Spjd if (err != ENOENT) 2177219089Spjd goto out; 2178168404Spjd 2179185029Spjd /* The very first snapshot does not have a deadlist */ 2180185029Spjd if (ds->ds_phys->ds_prev_snap_obj == 0) 2181185029Spjd continue; 2182185029Spjd 2183219089Spjd dsl_deadlist_space(&ds->ds_deadlist, 2184219089Spjd &dlused, &dlcomp, &dluncomp); 2185248571Smm ddpa->used += dlused; 2186248571Smm ddpa->comp += dlcomp; 2187248571Smm ddpa->uncomp += dluncomp; 2188168404Spjd } 2189168404Spjd 2190185029Spjd /* 2191185029Spjd * If we are a clone of a clone then we never reached ORIGIN, 2192185029Spjd * so we need to subtract out the clone origin's used space. 2193185029Spjd */ 2194248571Smm if (ddpa->origin_origin) { 2195248571Smm ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; 2196248571Smm ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; 2197248571Smm ddpa->uncomp -= 2198248571Smm ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; 2199168404Spjd } 2200168404Spjd 2201265744Sdelphij /* Check that there is enough space and limit headroom here */ 2202185029Spjd err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2203265744Sdelphij 0, ss_mv_cnt, ddpa->used, ddpa->cr); 2204248571Smm if (err != 0) 2205248571Smm goto out; 2206168404Spjd 2207185029Spjd /* 2208185029Spjd * Compute the amounts of space that will be used by snapshots 2209185029Spjd * after the promotion (for both origin and clone). For each, 2210185029Spjd * it is the amount of space that will be on all of their 2211185029Spjd * deadlists (that was not born before their new origin). 2212185029Spjd */ 2213185029Spjd if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2214185029Spjd uint64_t space; 2215168404Spjd 2216168404Spjd /* 2217185029Spjd * Note, typically this will not be a clone of a clone, 2218219089Spjd * so dd_origin_txg will be < TXG_INITIAL, so 2219219089Spjd * these snaplist_space() -> dsl_deadlist_space_range() 2220185029Spjd * calls will be fast because they do not have to 2221185029Spjd * iterate over all bps. 2222168404Spjd */ 2223248571Smm snap = list_head(&ddpa->origin_snaps); 2224248571Smm err = snaplist_space(&ddpa->shared_snaps, 2225248571Smm snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2226248571Smm if (err != 0) 2227248571Smm goto out; 2228168404Spjd 2229248571Smm err = snaplist_space(&ddpa->clone_snaps, 2230219089Spjd snap->ds->ds_dir->dd_origin_txg, &space); 2231248571Smm if (err != 0) 2232248571Smm goto out; 2233248571Smm ddpa->cloneusedsnap += space; 2234168404Spjd } 2235185029Spjd if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2236248571Smm err = snaplist_space(&ddpa->origin_snaps, 2237248571Smm origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); 2238248571Smm if (err != 0) 2239248571Smm goto out; 2240185029Spjd } 2241168404Spjd 2242219089Spjdout: 2243248571Smm promote_rele(ddpa, FTAG); 2244219089Spjd return (err); 2245168404Spjd} 2246168404Spjd 2247168404Spjdstatic void 2248248571Smmdsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 2249168404Spjd{ 2250248571Smm dsl_dataset_promote_arg_t *ddpa = arg; 2251248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2252248571Smm dsl_dataset_t *hds; 2253248571Smm struct promotenode *snap; 2254248571Smm dsl_dataset_t *origin_ds; 2255185029Spjd dsl_dataset_t *origin_head; 2256248571Smm dsl_dir_t *dd; 2257185029Spjd dsl_dir_t *odd = NULL; 2258185029Spjd uint64_t oldnext_obj; 2259185029Spjd int64_t delta; 2260273162Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 2261273162Ssmh char *oldname, *newname; 2262273162Ssmh#endif 2263168404Spjd 2264248571Smm VERIFY0(promote_hold(ddpa, dp, FTAG)); 2265248571Smm hds = ddpa->ddpa_clone; 2266168404Spjd 2267248571Smm ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); 2268248571Smm 2269248571Smm snap = list_head(&ddpa->shared_snaps); 2270248571Smm origin_ds = snap->ds; 2271248571Smm dd = hds->ds_dir; 2272248571Smm 2273248571Smm snap = list_head(&ddpa->origin_snaps); 2274185029Spjd origin_head = snap->ds; 2275185029Spjd 2276168404Spjd /* 2277185029Spjd * We need to explicitly open odd, since origin_ds's dd will be 2278168404Spjd * changing. 2279168404Spjd */ 2280248571Smm VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2281185029Spjd NULL, FTAG, &odd)); 2282168404Spjd 2283185029Spjd /* change origin's next snap */ 2284185029Spjd dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2285185029Spjd oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2286248571Smm snap = list_tail(&ddpa->clone_snaps); 2287185029Spjd ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2288185029Spjd origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2289185029Spjd 2290185029Spjd /* change the origin's next clone */ 2291185029Spjd if (origin_ds->ds_phys->ds_next_clones_obj) { 2292248571Smm dsl_dataset_remove_from_next_clones(origin_ds, 2293248571Smm snap->ds->ds_object, tx); 2294248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 2295185029Spjd origin_ds->ds_phys->ds_next_clones_obj, 2296185029Spjd oldnext_obj, tx)); 2297185029Spjd } 2298185029Spjd 2299185029Spjd /* change origin */ 2300185029Spjd dmu_buf_will_dirty(dd->dd_dbuf, tx); 2301185029Spjd ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2302185029Spjd dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2303219089Spjd dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2304185029Spjd dmu_buf_will_dirty(odd->dd_dbuf, tx); 2305185029Spjd odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2306219089Spjd origin_head->ds_dir->dd_origin_txg = 2307219089Spjd origin_ds->ds_phys->ds_creation_txg; 2308185029Spjd 2309219089Spjd /* change dd_clone entries */ 2310219089Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2311248571Smm VERIFY0(zap_remove_int(dp->dp_meta_objset, 2312219089Spjd odd->dd_phys->dd_clones, hds->ds_object, tx)); 2313248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 2314248571Smm ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2315219089Spjd hds->ds_object, tx)); 2316219089Spjd 2317248571Smm VERIFY0(zap_remove_int(dp->dp_meta_objset, 2318248571Smm ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2319219089Spjd origin_head->ds_object, tx)); 2320219089Spjd if (dd->dd_phys->dd_clones == 0) { 2321219089Spjd dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 2322219089Spjd DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 2323219089Spjd } 2324248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 2325219089Spjd dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 2326219089Spjd } 2327219089Spjd 2328273162Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 2329273162Ssmh /* Take the spa_namespace_lock early so zvol renames don't deadlock. */ 2330273162Ssmh mutex_enter(&spa_namespace_lock); 2331273162Ssmh 2332273162Ssmh oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2333273162Ssmh newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2334273162Ssmh#endif 2335273162Ssmh 2336168404Spjd /* move snapshots to this dir */ 2337248571Smm for (snap = list_head(&ddpa->shared_snaps); snap; 2338248571Smm snap = list_next(&ddpa->shared_snaps, snap)) { 2339185029Spjd dsl_dataset_t *ds = snap->ds; 2340168404Spjd 2341248571Smm /* 2342248571Smm * Property callbacks are registered to a particular 2343248571Smm * dsl_dir. Since ours is changing, evict the objset 2344248571Smm * so that they will be unregistered from the old dsl_dir. 2345248571Smm */ 2346219089Spjd if (ds->ds_objset) { 2347219089Spjd dmu_objset_evict(ds->ds_objset); 2348219089Spjd ds->ds_objset = NULL; 2349185029Spjd } 2350248571Smm 2351168404Spjd /* move snap name entry */ 2352248571Smm VERIFY0(dsl_dataset_get_snapname(ds)); 2353248571Smm VERIFY0(dsl_dataset_snap_remove(origin_head, 2354265744Sdelphij ds->ds_snapname, tx, B_TRUE)); 2355248571Smm VERIFY0(zap_add(dp->dp_meta_objset, 2356168404Spjd hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2357168404Spjd 8, 1, &ds->ds_object, tx)); 2358265744Sdelphij dsl_fs_ss_count_adjust(hds->ds_dir, 1, 2359265744Sdelphij DD_FIELD_SNAPSHOT_COUNT, tx); 2360219089Spjd 2361168404Spjd /* change containing dsl_dir */ 2362168404Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 2363185029Spjd ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2364168404Spjd ds->ds_phys->ds_dir_obj = dd->dd_object; 2365185029Spjd ASSERT3P(ds->ds_dir, ==, odd); 2366248571Smm dsl_dir_rele(ds->ds_dir, ds); 2367248571Smm VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2368168404Spjd NULL, ds, &ds->ds_dir)); 2369168404Spjd 2370273162Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 2371273162Ssmh dsl_dataset_name(ds, newname); 2372273162Ssmh zfsvfs_update_fromname(oldname, newname); 2373273162Ssmh zvol_rename_minors(oldname, newname); 2374273162Ssmh#endif 2375273162Ssmh 2376219089Spjd /* move any clone references */ 2377219089Spjd if (ds->ds_phys->ds_next_clones_obj && 2378219089Spjd spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2379219089Spjd zap_cursor_t zc; 2380219089Spjd zap_attribute_t za; 2381219089Spjd 2382219089Spjd for (zap_cursor_init(&zc, dp->dp_meta_objset, 2383219089Spjd ds->ds_phys->ds_next_clones_obj); 2384219089Spjd zap_cursor_retrieve(&zc, &za) == 0; 2385219089Spjd zap_cursor_advance(&zc)) { 2386219089Spjd dsl_dataset_t *cnds; 2387219089Spjd uint64_t o; 2388219089Spjd 2389219089Spjd if (za.za_first_integer == oldnext_obj) { 2390219089Spjd /* 2391219089Spjd * We've already moved the 2392219089Spjd * origin's reference. 2393219089Spjd */ 2394219089Spjd continue; 2395219089Spjd } 2396219089Spjd 2397248571Smm VERIFY0(dsl_dataset_hold_obj(dp, 2398219089Spjd za.za_first_integer, FTAG, &cnds)); 2399219089Spjd o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 2400219089Spjd 2401248571Smm VERIFY0(zap_remove_int(dp->dp_meta_objset, 2402248571Smm odd->dd_phys->dd_clones, o, tx)); 2403248571Smm VERIFY0(zap_add_int(dp->dp_meta_objset, 2404248571Smm dd->dd_phys->dd_clones, o, tx)); 2405219089Spjd dsl_dataset_rele(cnds, FTAG); 2406219089Spjd } 2407219089Spjd zap_cursor_fini(&zc); 2408219089Spjd } 2409219089Spjd 2410248571Smm ASSERT(!dsl_prop_hascb(ds)); 2411185029Spjd } 2412168404Spjd 2413273162Ssmh#if defined(__FreeBSD__) && defined(_KERNEL) 2414273162Ssmh mutex_exit(&spa_namespace_lock); 2415273162Ssmh 2416273162Ssmh kmem_free(newname, MAXPATHLEN); 2417273162Ssmh kmem_free(oldname, MAXPATHLEN); 2418273162Ssmh#endif 2419185029Spjd /* 2420185029Spjd * Change space accounting. 2421185029Spjd * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2422185029Spjd * both be valid, or both be 0 (resulting in delta == 0). This 2423185029Spjd * is true for each of {clone,origin} independently. 2424185029Spjd */ 2425168404Spjd 2426248571Smm delta = ddpa->cloneusedsnap - 2427185029Spjd dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2428185029Spjd ASSERT3S(delta, >=, 0); 2429248571Smm ASSERT3U(ddpa->used, >=, delta); 2430185029Spjd dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2431185029Spjd dsl_dir_diduse_space(dd, DD_USED_HEAD, 2432248571Smm ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2433168404Spjd 2434248571Smm delta = ddpa->originusedsnap - 2435185029Spjd odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2436185029Spjd ASSERT3S(delta, <=, 0); 2437248571Smm ASSERT3U(ddpa->used, >=, -delta); 2438185029Spjd dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2439185029Spjd dsl_dir_diduse_space(odd, DD_USED_HEAD, 2440248571Smm -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2441185029Spjd 2442248571Smm origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; 2443185029Spjd 2444185029Spjd /* log history record */ 2445248571Smm spa_history_log_internal_ds(hds, "promote", tx, ""); 2446185029Spjd 2447248571Smm dsl_dir_rele(odd, FTAG); 2448248571Smm promote_rele(ddpa, FTAG); 2449185029Spjd} 2450185029Spjd 2451185029Spjd/* 2452185029Spjd * Make a list of dsl_dataset_t's for the snapshots between first_obj 2453185029Spjd * (exclusive) and last_obj (inclusive). The list will be in reverse 2454185029Spjd * order (last_obj will be the list_head()). If first_obj == 0, do all 2455185029Spjd * snapshots back to this dataset's origin. 2456185029Spjd */ 2457185029Spjdstatic int 2458248571Smmsnaplist_make(dsl_pool_t *dp, 2459248571Smm uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2460185029Spjd{ 2461185029Spjd uint64_t obj = last_obj; 2462185029Spjd 2463185029Spjd list_create(l, sizeof (struct promotenode), 2464185029Spjd offsetof(struct promotenode, link)); 2465185029Spjd 2466185029Spjd while (obj != first_obj) { 2467185029Spjd dsl_dataset_t *ds; 2468185029Spjd struct promotenode *snap; 2469185029Spjd int err; 2470185029Spjd 2471248571Smm err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2472248571Smm ASSERT(err != ENOENT); 2473248571Smm if (err != 0) 2474185029Spjd return (err); 2475185029Spjd 2476185029Spjd if (first_obj == 0) 2477185029Spjd first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2478185029Spjd 2479248571Smm snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2480185029Spjd snap->ds = ds; 2481185029Spjd list_insert_tail(l, snap); 2482185029Spjd obj = ds->ds_phys->ds_prev_snap_obj; 2483168404Spjd } 2484168404Spjd 2485185029Spjd return (0); 2486185029Spjd} 2487168404Spjd 2488185029Spjdstatic int 2489185029Spjdsnaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2490185029Spjd{ 2491185029Spjd struct promotenode *snap; 2492168404Spjd 2493185029Spjd *spacep = 0; 2494185029Spjd for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2495219089Spjd uint64_t used, comp, uncomp; 2496219089Spjd dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2497219089Spjd mintxg, UINT64_MAX, &used, &comp, &uncomp); 2498185029Spjd *spacep += used; 2499185029Spjd } 2500185029Spjd return (0); 2501185029Spjd} 2502168404Spjd 2503185029Spjdstatic void 2504248571Smmsnaplist_destroy(list_t *l, void *tag) 2505185029Spjd{ 2506185029Spjd struct promotenode *snap; 2507185029Spjd 2508248571Smm if (l == NULL || !list_link_active(&l->list_head)) 2509185029Spjd return; 2510185029Spjd 2511185029Spjd while ((snap = list_tail(l)) != NULL) { 2512185029Spjd list_remove(l, snap); 2513248571Smm dsl_dataset_rele(snap->ds, tag); 2514248571Smm kmem_free(snap, sizeof (*snap)); 2515185029Spjd } 2516185029Spjd list_destroy(l); 2517168404Spjd} 2518168404Spjd 2519248571Smmstatic int 2520248571Smmpromote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2521168404Spjd{ 2522248571Smm int error; 2523185029Spjd dsl_dir_t *dd; 2524185029Spjd struct promotenode *snap; 2525168404Spjd 2526248571Smm error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2527248571Smm &ddpa->ddpa_clone); 2528248571Smm if (error != 0) 2529248571Smm return (error); 2530248571Smm dd = ddpa->ddpa_clone->ds_dir; 2531168404Spjd 2532248571Smm if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || 2533248571Smm !dsl_dir_is_clone(dd)) { 2534248571Smm dsl_dataset_rele(ddpa->ddpa_clone, tag); 2535249195Smm return (SET_ERROR(EINVAL)); 2536185029Spjd } 2537185029Spjd 2538248571Smm error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, 2539248571Smm &ddpa->shared_snaps, tag); 2540248571Smm if (error != 0) 2541185029Spjd goto out; 2542185029Spjd 2543248571Smm error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2544248571Smm &ddpa->clone_snaps, tag); 2545248571Smm if (error != 0) 2546185029Spjd goto out; 2547185029Spjd 2548248571Smm snap = list_head(&ddpa->shared_snaps); 2549185029Spjd ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2550248571Smm error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, 2551248571Smm snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, 2552248571Smm &ddpa->origin_snaps, tag); 2553248571Smm if (error != 0) 2554185029Spjd goto out; 2555185029Spjd 2556219089Spjd if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 2557248571Smm error = dsl_dataset_hold_obj(dp, 2558185029Spjd snap->ds->ds_dir->dd_phys->dd_origin_obj, 2559248571Smm tag, &ddpa->origin_origin); 2560248571Smm if (error != 0) 2561185029Spjd goto out; 2562185029Spjd } 2563185029Spjdout: 2564248571Smm if (error != 0) 2565248571Smm promote_rele(ddpa, tag); 2566248571Smm return (error); 2567248571Smm} 2568185029Spjd 2569248571Smmstatic void 2570248571Smmpromote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2571248571Smm{ 2572248571Smm snaplist_destroy(&ddpa->shared_snaps, tag); 2573248571Smm snaplist_destroy(&ddpa->clone_snaps, tag); 2574248571Smm snaplist_destroy(&ddpa->origin_snaps, tag); 2575248571Smm if (ddpa->origin_origin != NULL) 2576248571Smm dsl_dataset_rele(ddpa->origin_origin, tag); 2577248571Smm dsl_dataset_rele(ddpa->ddpa_clone, tag); 2578248571Smm} 2579248571Smm 2580248571Smm/* 2581248571Smm * Promote a clone. 2582248571Smm * 2583248571Smm * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2584248571Smm * in with the name. (It must be at least MAXNAMELEN bytes long.) 2585248571Smm */ 2586248571Smmint 2587248571Smmdsl_dataset_promote(const char *name, char *conflsnap) 2588248571Smm{ 2589248571Smm dsl_dataset_promote_arg_t ddpa = { 0 }; 2590248571Smm uint64_t numsnaps; 2591248571Smm int error; 2592248571Smm objset_t *os; 2593248571Smm 2594185029Spjd /* 2595248571Smm * We will modify space proportional to the number of 2596248571Smm * snapshots. Compute numsnaps. 2597168404Spjd */ 2598248571Smm error = dmu_objset_hold(name, FTAG, &os); 2599248571Smm if (error != 0) 2600248571Smm return (error); 2601248571Smm error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2602248571Smm dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); 2603248571Smm dmu_objset_rele(os, FTAG); 2604248571Smm if (error != 0) 2605248571Smm return (error); 2606185029Spjd 2607248571Smm ddpa.ddpa_clonename = name; 2608248571Smm ddpa.err_ds = conflsnap; 2609265744Sdelphij ddpa.cr = CRED(); 2610248571Smm 2611248571Smm return (dsl_sync_task(name, dsl_dataset_promote_check, 2612269006Sdelphij dsl_dataset_promote_sync, &ddpa, 2613269006Sdelphij 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED)); 2614168404Spjd} 2615168404Spjd 2616248571Smmint 2617248571Smmdsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2618253816Sdelphij dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2619185029Spjd{ 2620248571Smm int64_t unused_refres_delta; 2621185029Spjd 2622185029Spjd /* they should both be heads */ 2623248571Smm if (dsl_dataset_is_snapshot(clone) || 2624248571Smm dsl_dataset_is_snapshot(origin_head)) 2625249195Smm return (SET_ERROR(EINVAL)); 2626185029Spjd 2627253820Sdelphij /* if we are not forcing, the branch point should be just before them */ 2628253820Sdelphij if (!force && clone->ds_prev != origin_head->ds_prev) 2629249195Smm return (SET_ERROR(EINVAL)); 2630185029Spjd 2631248571Smm /* clone should be the clone (unless they are unrelated) */ 2632248571Smm if (clone->ds_prev != NULL && 2633248571Smm clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 2634253820Sdelphij origin_head->ds_dir != clone->ds_prev->ds_dir) 2635249195Smm return (SET_ERROR(EINVAL)); 2636185029Spjd 2637185029Spjd /* the clone should be a child of the origin */ 2638248571Smm if (clone->ds_dir->dd_parent != origin_head->ds_dir) 2639249195Smm return (SET_ERROR(EINVAL)); 2640185029Spjd 2641248571Smm /* origin_head shouldn't be modified unless 'force' */ 2642253820Sdelphij if (!force && 2643253820Sdelphij dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) 2644249195Smm return (SET_ERROR(ETXTBSY)); 2645185029Spjd 2646248571Smm /* origin_head should have no long holds (e.g. is not mounted) */ 2647253816Sdelphij if (dsl_dataset_handoff_check(origin_head, owner, tx)) 2648249195Smm return (SET_ERROR(EBUSY)); 2649185029Spjd 2650248571Smm /* check amount of any unconsumed refreservation */ 2651248571Smm unused_refres_delta = 2652248571Smm (int64_t)MIN(origin_head->ds_reserved, 2653248571Smm origin_head->ds_phys->ds_unique_bytes) - 2654248571Smm (int64_t)MIN(origin_head->ds_reserved, 2655248571Smm clone->ds_phys->ds_unique_bytes); 2656248571Smm 2657248571Smm if (unused_refres_delta > 0 && 2658248571Smm unused_refres_delta > 2659248571Smm dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 2660249195Smm return (SET_ERROR(ENOSPC)); 2661185029Spjd 2662248571Smm /* clone can't be over the head's refquota */ 2663248571Smm if (origin_head->ds_quota != 0 && 2664248571Smm clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) 2665249195Smm return (SET_ERROR(EDQUOT)); 2666219089Spjd 2667185029Spjd return (0); 2668185029Spjd} 2669185029Spjd 2670248571Smmvoid 2671248571Smmdsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 2672248571Smm dsl_dataset_t *origin_head, dmu_tx_t *tx) 2673185029Spjd{ 2674248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2675248571Smm int64_t unused_refres_delta; 2676185029Spjd 2677248571Smm ASSERT(clone->ds_reserved == 0); 2678248571Smm ASSERT(origin_head->ds_quota == 0 || 2679248571Smm clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); 2680253820Sdelphij ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); 2681185029Spjd 2682248571Smm dmu_buf_will_dirty(clone->ds_dbuf, tx); 2683248571Smm dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2684185029Spjd 2685248571Smm if (clone->ds_objset != NULL) { 2686248571Smm dmu_objset_evict(clone->ds_objset); 2687248571Smm clone->ds_objset = NULL; 2688185029Spjd } 2689185029Spjd 2690248571Smm if (origin_head->ds_objset != NULL) { 2691248571Smm dmu_objset_evict(origin_head->ds_objset); 2692248571Smm origin_head->ds_objset = NULL; 2693185029Spjd } 2694185029Spjd 2695248571Smm unused_refres_delta = 2696248571Smm (int64_t)MIN(origin_head->ds_reserved, 2697248571Smm origin_head->ds_phys->ds_unique_bytes) - 2698248571Smm (int64_t)MIN(origin_head->ds_reserved, 2699248571Smm clone->ds_phys->ds_unique_bytes); 2700248571Smm 2701219089Spjd /* 2702219089Spjd * Reset origin's unique bytes, if it exists. 2703219089Spjd */ 2704248571Smm if (clone->ds_prev) { 2705248571Smm dsl_dataset_t *origin = clone->ds_prev; 2706219089Spjd uint64_t comp, uncomp; 2707185029Spjd 2708219089Spjd dmu_buf_will_dirty(origin->ds_dbuf, tx); 2709248571Smm dsl_deadlist_space_range(&clone->ds_deadlist, 2710219089Spjd origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2711219089Spjd &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 2712219089Spjd } 2713219089Spjd 2714185029Spjd /* swap blkptrs */ 2715185029Spjd { 2716185029Spjd blkptr_t tmp; 2717248571Smm tmp = origin_head->ds_phys->ds_bp; 2718248571Smm origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; 2719248571Smm clone->ds_phys->ds_bp = tmp; 2720185029Spjd } 2721185029Spjd 2722185029Spjd /* set dd_*_bytes */ 2723185029Spjd { 2724185029Spjd int64_t dused, dcomp, duncomp; 2725185029Spjd uint64_t cdl_used, cdl_comp, cdl_uncomp; 2726185029Spjd uint64_t odl_used, odl_comp, odl_uncomp; 2727185029Spjd 2728248571Smm ASSERT3U(clone->ds_dir->dd_phys-> 2729185029Spjd dd_used_breakdown[DD_USED_SNAP], ==, 0); 2730185029Spjd 2731248571Smm dsl_deadlist_space(&clone->ds_deadlist, 2732219089Spjd &cdl_used, &cdl_comp, &cdl_uncomp); 2733248571Smm dsl_deadlist_space(&origin_head->ds_deadlist, 2734219089Spjd &odl_used, &odl_comp, &odl_uncomp); 2735185029Spjd 2736248571Smm dused = clone->ds_phys->ds_referenced_bytes + cdl_used - 2737248571Smm (origin_head->ds_phys->ds_referenced_bytes + odl_used); 2738248571Smm dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - 2739248571Smm (origin_head->ds_phys->ds_compressed_bytes + odl_comp); 2740248571Smm duncomp = clone->ds_phys->ds_uncompressed_bytes + 2741185029Spjd cdl_uncomp - 2742248571Smm (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2743185029Spjd 2744248571Smm dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 2745185029Spjd dused, dcomp, duncomp, tx); 2746248571Smm dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 2747185029Spjd -dused, -dcomp, -duncomp, tx); 2748185029Spjd 2749185029Spjd /* 2750185029Spjd * The difference in the space used by snapshots is the 2751185029Spjd * difference in snapshot space due to the head's 2752185029Spjd * deadlist (since that's the only thing that's 2753185029Spjd * changing that affects the snapused). 2754185029Spjd */ 2755248571Smm dsl_deadlist_space_range(&clone->ds_deadlist, 2756248571Smm origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2757219089Spjd &cdl_used, &cdl_comp, &cdl_uncomp); 2758248571Smm dsl_deadlist_space_range(&origin_head->ds_deadlist, 2759248571Smm origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2760219089Spjd &odl_used, &odl_comp, &odl_uncomp); 2761248571Smm dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 2762185029Spjd DD_USED_HEAD, DD_USED_SNAP, tx); 2763185029Spjd } 2764185029Spjd 2765185029Spjd /* swap ds_*_bytes */ 2766248571Smm SWITCH64(origin_head->ds_phys->ds_referenced_bytes, 2767248571Smm clone->ds_phys->ds_referenced_bytes); 2768248571Smm SWITCH64(origin_head->ds_phys->ds_compressed_bytes, 2769248571Smm clone->ds_phys->ds_compressed_bytes); 2770248571Smm SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, 2771248571Smm clone->ds_phys->ds_uncompressed_bytes); 2772248571Smm SWITCH64(origin_head->ds_phys->ds_unique_bytes, 2773248571Smm clone->ds_phys->ds_unique_bytes); 2774185029Spjd 2775185029Spjd /* apply any parent delta for change in unconsumed refreservation */ 2776248571Smm dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 2777248571Smm unused_refres_delta, 0, 0, tx); 2778185029Spjd 2779219089Spjd /* 2780219089Spjd * Swap deadlists. 2781219089Spjd */ 2782248571Smm dsl_deadlist_close(&clone->ds_deadlist); 2783248571Smm dsl_deadlist_close(&origin_head->ds_deadlist); 2784248571Smm SWITCH64(origin_head->ds_phys->ds_deadlist_obj, 2785248571Smm clone->ds_phys->ds_deadlist_obj); 2786248571Smm dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 2787248571Smm clone->ds_phys->ds_deadlist_obj); 2788248571Smm dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 2789248571Smm origin_head->ds_phys->ds_deadlist_obj); 2790208047Smm 2791248571Smm dsl_scan_ds_clone_swapped(origin_head, clone, tx); 2792185029Spjd 2793248571Smm spa_history_log_internal_ds(clone, "clone swap", tx, 2794248571Smm "parent=%s", origin_head->ds_dir->dd_myname); 2795185029Spjd} 2796185029Spjd 2797185029Spjd/* 2798168404Spjd * Given a pool name and a dataset object number in that pool, 2799168404Spjd * return the name of that dataset. 2800168404Spjd */ 2801168404Spjdint 2802168404Spjddsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2803168404Spjd{ 2804168404Spjd dsl_pool_t *dp; 2805185029Spjd dsl_dataset_t *ds; 2806168404Spjd int error; 2807168404Spjd 2808248571Smm error = dsl_pool_hold(pname, FTAG, &dp); 2809248571Smm if (error != 0) 2810168404Spjd return (error); 2811248571Smm 2812248571Smm error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 2813248571Smm if (error == 0) { 2814185029Spjd dsl_dataset_name(ds, buf); 2815185029Spjd dsl_dataset_rele(ds, FTAG); 2816168404Spjd } 2817248571Smm dsl_pool_rele(dp, FTAG); 2818168404Spjd 2819185029Spjd return (error); 2820185029Spjd} 2821185029Spjd 2822185029Spjdint 2823185029Spjddsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2824185029Spjd uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2825185029Spjd{ 2826185029Spjd int error = 0; 2827185029Spjd 2828185029Spjd ASSERT3S(asize, >, 0); 2829185029Spjd 2830185029Spjd /* 2831185029Spjd * *ref_rsrv is the portion of asize that will come from any 2832185029Spjd * unconsumed refreservation space. 2833185029Spjd */ 2834185029Spjd *ref_rsrv = 0; 2835185029Spjd 2836185029Spjd mutex_enter(&ds->ds_lock); 2837185029Spjd /* 2838185029Spjd * Make a space adjustment for reserved bytes. 2839185029Spjd */ 2840185029Spjd if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2841185029Spjd ASSERT3U(*used, >=, 2842185029Spjd ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2843185029Spjd *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2844185029Spjd *ref_rsrv = 2845185029Spjd asize - MIN(asize, parent_delta(ds, asize + inflight)); 2846185029Spjd } 2847185029Spjd 2848185029Spjd if (!check_quota || ds->ds_quota == 0) { 2849185029Spjd mutex_exit(&ds->ds_lock); 2850185029Spjd return (0); 2851185029Spjd } 2852185029Spjd /* 2853185029Spjd * If they are requesting more space, and our current estimate 2854185029Spjd * is over quota, they get to try again unless the actual 2855185029Spjd * on-disk is over quota and there are no pending changes (which 2856185029Spjd * may free up space for us). 2857185029Spjd */ 2858236884Smm if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { 2859236884Smm if (inflight > 0 || 2860236884Smm ds->ds_phys->ds_referenced_bytes < ds->ds_quota) 2861249195Smm error = SET_ERROR(ERESTART); 2862185029Spjd else 2863249195Smm error = SET_ERROR(EDQUOT); 2864185029Spjd } 2865185029Spjd mutex_exit(&ds->ds_lock); 2866185029Spjd 2867185029Spjd return (error); 2868185029Spjd} 2869185029Spjd 2870248571Smmtypedef struct dsl_dataset_set_qr_arg { 2871248571Smm const char *ddsqra_name; 2872248571Smm zprop_source_t ddsqra_source; 2873248571Smm uint64_t ddsqra_value; 2874248571Smm} dsl_dataset_set_qr_arg_t; 2875248571Smm 2876248571Smm 2877185029Spjd/* ARGSUSED */ 2878185029Spjdstatic int 2879248571Smmdsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 2880185029Spjd{ 2881248571Smm dsl_dataset_set_qr_arg_t *ddsqra = arg; 2882248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2883248571Smm dsl_dataset_t *ds; 2884248571Smm int error; 2885248571Smm uint64_t newval; 2886185029Spjd 2887248571Smm if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 2888249195Smm return (SET_ERROR(ENOTSUP)); 2889185029Spjd 2890248571Smm error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2891248571Smm if (error != 0) 2892248571Smm return (error); 2893219089Spjd 2894248571Smm if (dsl_dataset_is_snapshot(ds)) { 2895248571Smm dsl_dataset_rele(ds, FTAG); 2896249195Smm return (SET_ERROR(EINVAL)); 2897248571Smm } 2898248571Smm 2899248571Smm error = dsl_prop_predict(ds->ds_dir, 2900248571Smm zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2901248571Smm ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2902248571Smm if (error != 0) { 2903248571Smm dsl_dataset_rele(ds, FTAG); 2904248571Smm return (error); 2905248571Smm } 2906248571Smm 2907248571Smm if (newval == 0) { 2908248571Smm dsl_dataset_rele(ds, FTAG); 2909185029Spjd return (0); 2910248571Smm } 2911185029Spjd 2912248571Smm if (newval < ds->ds_phys->ds_referenced_bytes || 2913248571Smm newval < ds->ds_reserved) { 2914248571Smm dsl_dataset_rele(ds, FTAG); 2915249195Smm return (SET_ERROR(ENOSPC)); 2916248571Smm } 2917185029Spjd 2918248571Smm dsl_dataset_rele(ds, FTAG); 2919168404Spjd return (0); 2920168404Spjd} 2921185029Spjd 2922248571Smmstatic void 2923248571Smmdsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 2924185029Spjd{ 2925248571Smm dsl_dataset_set_qr_arg_t *ddsqra = arg; 2926248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2927248571Smm dsl_dataset_t *ds; 2928248571Smm uint64_t newval; 2929185029Spjd 2930248571Smm VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2931185029Spjd 2932248571Smm dsl_prop_set_sync_impl(ds, 2933248571Smm zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2934248571Smm ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 2935248571Smm &ddsqra->ddsqra_value, tx); 2936248571Smm 2937248571Smm VERIFY0(dsl_prop_get_int_ds(ds, 2938248571Smm zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 2939248571Smm 2940248571Smm if (ds->ds_quota != newval) { 2941219089Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 2942248571Smm ds->ds_quota = newval; 2943219089Spjd } 2944248571Smm dsl_dataset_rele(ds, FTAG); 2945185029Spjd} 2946185029Spjd 2947185029Spjdint 2948248571Smmdsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 2949248571Smm uint64_t refquota) 2950185029Spjd{ 2951248571Smm dsl_dataset_set_qr_arg_t ddsqra; 2952185029Spjd 2953248571Smm ddsqra.ddsqra_name = dsname; 2954248571Smm ddsqra.ddsqra_source = source; 2955248571Smm ddsqra.ddsqra_value = refquota; 2956219089Spjd 2957248571Smm return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 2958269006Sdelphij dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE)); 2959185029Spjd} 2960185029Spjd 2961185029Spjdstatic int 2962248571Smmdsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 2963185029Spjd{ 2964248571Smm dsl_dataset_set_qr_arg_t *ddsqra = arg; 2965248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2966248571Smm dsl_dataset_t *ds; 2967248571Smm int error; 2968248571Smm uint64_t newval, unique; 2969185029Spjd 2970248571Smm if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 2971249195Smm return (SET_ERROR(ENOTSUP)); 2972185029Spjd 2973248571Smm error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2974248571Smm if (error != 0) 2975248571Smm return (error); 2976248571Smm 2977248571Smm if (dsl_dataset_is_snapshot(ds)) { 2978248571Smm dsl_dataset_rele(ds, FTAG); 2979249195Smm return (SET_ERROR(EINVAL)); 2980248571Smm } 2981185029Spjd 2982248571Smm error = dsl_prop_predict(ds->ds_dir, 2983248571Smm zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2984248571Smm ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2985248571Smm if (error != 0) { 2986248571Smm dsl_dataset_rele(ds, FTAG); 2987248571Smm return (error); 2988248571Smm } 2989219089Spjd 2990185029Spjd /* 2991185029Spjd * If we are doing the preliminary check in open context, the 2992185029Spjd * space estimates may be inaccurate. 2993185029Spjd */ 2994248571Smm if (!dmu_tx_is_syncing(tx)) { 2995248571Smm dsl_dataset_rele(ds, FTAG); 2996185029Spjd return (0); 2997248571Smm } 2998185029Spjd 2999185029Spjd mutex_enter(&ds->ds_lock); 3000219089Spjd if (!DS_UNIQUE_IS_ACCURATE(ds)) 3001219089Spjd dsl_dataset_recalc_head_uniq(ds); 3002219089Spjd unique = ds->ds_phys->ds_unique_bytes; 3003185029Spjd mutex_exit(&ds->ds_lock); 3004185029Spjd 3005248571Smm if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 3006248571Smm uint64_t delta = MAX(unique, newval) - 3007209962Smm MAX(unique, ds->ds_reserved); 3008185029Spjd 3009248571Smm if (delta > 3010248571Smm dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 3011248571Smm (ds->ds_quota > 0 && newval > ds->ds_quota)) { 3012248571Smm dsl_dataset_rele(ds, FTAG); 3013249195Smm return (SET_ERROR(ENOSPC)); 3014248571Smm } 3015209962Smm } 3016209962Smm 3017248571Smm dsl_dataset_rele(ds, FTAG); 3018185029Spjd return (0); 3019185029Spjd} 3020185029Spjd 3021248571Smmvoid 3022248571Smmdsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 3023248571Smm zprop_source_t source, uint64_t value, dmu_tx_t *tx) 3024185029Spjd{ 3025248571Smm uint64_t newval; 3026185029Spjd uint64_t unique; 3027185029Spjd int64_t delta; 3028185029Spjd 3029248571Smm dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3030248571Smm source, sizeof (value), 1, &value, tx); 3031219089Spjd 3032248571Smm VERIFY0(dsl_prop_get_int_ds(ds, 3033248571Smm zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 3034248571Smm 3035185029Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 3036185029Spjd mutex_enter(&ds->ds_dir->dd_lock); 3037185029Spjd mutex_enter(&ds->ds_lock); 3038219089Spjd ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 3039219089Spjd unique = ds->ds_phys->ds_unique_bytes; 3040248571Smm delta = MAX(0, (int64_t)(newval - unique)) - 3041185029Spjd MAX(0, (int64_t)(ds->ds_reserved - unique)); 3042248571Smm ds->ds_reserved = newval; 3043185029Spjd mutex_exit(&ds->ds_lock); 3044185029Spjd 3045185029Spjd dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3046185029Spjd mutex_exit(&ds->ds_dir->dd_lock); 3047185029Spjd} 3048185029Spjd 3049248571Smmstatic void 3050248571Smmdsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 3051185029Spjd{ 3052248571Smm dsl_dataset_set_qr_arg_t *ddsqra = arg; 3053248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 3054185029Spjd dsl_dataset_t *ds; 3055185029Spjd 3056248571Smm VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3057248571Smm dsl_dataset_set_refreservation_sync_impl(ds, 3058248571Smm ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 3059185029Spjd dsl_dataset_rele(ds, FTAG); 3060185029Spjd} 3061219089Spjd 3062219089Spjdint 3063248571Smmdsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 3064248571Smm uint64_t refreservation) 3065219089Spjd{ 3066248571Smm dsl_dataset_set_qr_arg_t ddsqra; 3067219089Spjd 3068248571Smm ddsqra.ddsqra_name = dsname; 3069248571Smm ddsqra.ddsqra_source = source; 3070248571Smm ddsqra.ddsqra_value = refreservation; 3071219089Spjd 3072248571Smm return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 3073269006Sdelphij dsl_dataset_set_refreservation_sync, &ddsqra, 3074269006Sdelphij 0, ZFS_SPACE_CHECK_NONE)); 3075219089Spjd} 3076219089Spjd 3077219089Spjd/* 3078228103Smm * Return (in *usedp) the amount of space written in new that is not 3079228103Smm * present in oldsnap. New may be a snapshot or the head. Old must be 3080228103Smm * a snapshot before new, in new's filesystem (or its origin). If not then 3081228103Smm * fail and return EINVAL. 3082228103Smm * 3083228103Smm * The written space is calculated by considering two components: First, we 3084228103Smm * ignore any freed space, and calculate the written as new's used space 3085228103Smm * minus old's used space. Next, we add in the amount of space that was freed 3086228103Smm * between the two snapshots, thus reducing new's used space relative to old's. 3087228103Smm * Specifically, this is the space that was born before old->ds_creation_txg, 3088228103Smm * and freed before new (ie. on new's deadlist or a previous deadlist). 3089228103Smm * 3090228103Smm * space freed [---------------------] 3091228103Smm * snapshots ---O-------O--------O-------O------ 3092228103Smm * oldsnap new 3093228103Smm */ 3094228103Smmint 3095228103Smmdsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 3096228103Smm uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3097228103Smm{ 3098228103Smm int err = 0; 3099228103Smm uint64_t snapobj; 3100228103Smm dsl_pool_t *dp = new->ds_dir->dd_pool; 3101228103Smm 3102248571Smm ASSERT(dsl_pool_config_held(dp)); 3103248571Smm 3104228103Smm *usedp = 0; 3105236884Smm *usedp += new->ds_phys->ds_referenced_bytes; 3106236884Smm *usedp -= oldsnap->ds_phys->ds_referenced_bytes; 3107228103Smm 3108228103Smm *compp = 0; 3109228103Smm *compp += new->ds_phys->ds_compressed_bytes; 3110228103Smm *compp -= oldsnap->ds_phys->ds_compressed_bytes; 3111228103Smm 3112228103Smm *uncompp = 0; 3113228103Smm *uncompp += new->ds_phys->ds_uncompressed_bytes; 3114228103Smm *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; 3115228103Smm 3116228103Smm snapobj = new->ds_object; 3117228103Smm while (snapobj != oldsnap->ds_object) { 3118228103Smm dsl_dataset_t *snap; 3119228103Smm uint64_t used, comp, uncomp; 3120228103Smm 3121236884Smm if (snapobj == new->ds_object) { 3122236884Smm snap = new; 3123236884Smm } else { 3124236884Smm err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 3125236884Smm if (err != 0) 3126236884Smm break; 3127236884Smm } 3128228103Smm 3129228103Smm if (snap->ds_phys->ds_prev_snap_txg == 3130228103Smm oldsnap->ds_phys->ds_creation_txg) { 3131228103Smm /* 3132228103Smm * The blocks in the deadlist can not be born after 3133228103Smm * ds_prev_snap_txg, so get the whole deadlist space, 3134228103Smm * which is more efficient (especially for old-format 3135228103Smm * deadlists). Unfortunately the deadlist code 3136228103Smm * doesn't have enough information to make this 3137228103Smm * optimization itself. 3138228103Smm */ 3139228103Smm dsl_deadlist_space(&snap->ds_deadlist, 3140228103Smm &used, &comp, &uncomp); 3141228103Smm } else { 3142228103Smm dsl_deadlist_space_range(&snap->ds_deadlist, 3143228103Smm 0, oldsnap->ds_phys->ds_creation_txg, 3144228103Smm &used, &comp, &uncomp); 3145228103Smm } 3146228103Smm *usedp += used; 3147228103Smm *compp += comp; 3148228103Smm *uncompp += uncomp; 3149228103Smm 3150228103Smm /* 3151228103Smm * If we get to the beginning of the chain of snapshots 3152228103Smm * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 3153228103Smm * was not a snapshot of/before new. 3154228103Smm */ 3155228103Smm snapobj = snap->ds_phys->ds_prev_snap_obj; 3156236884Smm if (snap != new) 3157236884Smm dsl_dataset_rele(snap, FTAG); 3158228103Smm if (snapobj == 0) { 3159249195Smm err = SET_ERROR(EINVAL); 3160228103Smm break; 3161228103Smm } 3162228103Smm 3163228103Smm } 3164228103Smm return (err); 3165228103Smm} 3166228103Smm 3167228103Smm/* 3168228103Smm * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 3169228103Smm * lastsnap, and all snapshots in between are deleted. 3170228103Smm * 3171228103Smm * blocks that would be freed [---------------------------] 3172228103Smm * snapshots ---O-------O--------O-------O--------O 3173228103Smm * firstsnap lastsnap 3174228103Smm * 3175228103Smm * This is the set of blocks that were born after the snap before firstsnap, 3176228103Smm * (birth > firstsnap->prev_snap_txg) and died before the snap after the 3177228103Smm * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 3178228103Smm * We calculate this by iterating over the relevant deadlists (from the snap 3179228103Smm * after lastsnap, backward to the snap after firstsnap), summing up the 3180228103Smm * space on the deadlist that was born after the snap before firstsnap. 3181228103Smm */ 3182228103Smmint 3183228103Smmdsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 3184228103Smm dsl_dataset_t *lastsnap, 3185228103Smm uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3186228103Smm{ 3187228103Smm int err = 0; 3188228103Smm uint64_t snapobj; 3189228103Smm dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 3190228103Smm 3191228103Smm ASSERT(dsl_dataset_is_snapshot(firstsnap)); 3192228103Smm ASSERT(dsl_dataset_is_snapshot(lastsnap)); 3193228103Smm 3194228103Smm /* 3195228103Smm * Check that the snapshots are in the same dsl_dir, and firstsnap 3196228103Smm * is before lastsnap. 3197228103Smm */ 3198228103Smm if (firstsnap->ds_dir != lastsnap->ds_dir || 3199228103Smm firstsnap->ds_phys->ds_creation_txg > 3200228103Smm lastsnap->ds_phys->ds_creation_txg) 3201249195Smm return (SET_ERROR(EINVAL)); 3202228103Smm 3203228103Smm *usedp = *compp = *uncompp = 0; 3204228103Smm 3205228103Smm snapobj = lastsnap->ds_phys->ds_next_snap_obj; 3206228103Smm while (snapobj != firstsnap->ds_object) { 3207228103Smm dsl_dataset_t *ds; 3208228103Smm uint64_t used, comp, uncomp; 3209228103Smm 3210228103Smm err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 3211228103Smm if (err != 0) 3212228103Smm break; 3213228103Smm 3214228103Smm dsl_deadlist_space_range(&ds->ds_deadlist, 3215228103Smm firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, 3216228103Smm &used, &comp, &uncomp); 3217228103Smm *usedp += used; 3218228103Smm *compp += comp; 3219228103Smm *uncompp += uncomp; 3220228103Smm 3221228103Smm snapobj = ds->ds_phys->ds_prev_snap_obj; 3222228103Smm ASSERT3U(snapobj, !=, 0); 3223228103Smm dsl_dataset_rele(ds, FTAG); 3224228103Smm } 3225228103Smm return (err); 3226228103Smm} 3227248571Smm 3228248571Smm/* 3229248571Smm * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 3230248571Smm * For example, they could both be snapshots of the same filesystem, and 3231248571Smm * 'earlier' is before 'later'. Or 'earlier' could be the origin of 3232248571Smm * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 3233248571Smm * filesystem. Or 'earlier' could be the origin's origin. 3234263407Sdelphij * 3235263407Sdelphij * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. 3236248571Smm */ 3237248571Smmboolean_t 3238263407Sdelphijdsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, 3239263407Sdelphij uint64_t earlier_txg) 3240248571Smm{ 3241248571Smm dsl_pool_t *dp = later->ds_dir->dd_pool; 3242248571Smm int error; 3243248571Smm boolean_t ret; 3244248571Smm 3245248571Smm ASSERT(dsl_pool_config_held(dp)); 3246263407Sdelphij ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0); 3247248571Smm 3248263407Sdelphij if (earlier_txg == 0) 3249263407Sdelphij earlier_txg = earlier->ds_phys->ds_creation_txg; 3250263407Sdelphij 3251263407Sdelphij if (dsl_dataset_is_snapshot(later) && 3252263407Sdelphij earlier_txg >= later->ds_phys->ds_creation_txg) 3253248571Smm return (B_FALSE); 3254248571Smm 3255248571Smm if (later->ds_dir == earlier->ds_dir) 3256248571Smm return (B_TRUE); 3257248571Smm if (!dsl_dir_is_clone(later->ds_dir)) 3258248571Smm return (B_FALSE); 3259248571Smm 3260248571Smm if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) 3261248571Smm return (B_TRUE); 3262248571Smm dsl_dataset_t *origin; 3263248571Smm error = dsl_dataset_hold_obj(dp, 3264248571Smm later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); 3265248571Smm if (error != 0) 3266248571Smm return (B_FALSE); 3267263407Sdelphij ret = dsl_dataset_is_before(origin, earlier, earlier_txg); 3268248571Smm dsl_dataset_rele(origin, FTAG); 3269248571Smm return (ret); 3270248571Smm} 3271263390Sdelphij 3272263390Sdelphij 3273263390Sdelphijvoid 3274263390Sdelphijdsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) 3275263390Sdelphij{ 3276263390Sdelphij objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3277263390Sdelphij dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); 3278263390Sdelphij} 3279