dsl_pool.c revision 209962
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#include <sys/dsl_pool.h> 27#include <sys/dsl_dataset.h> 28#include <sys/dsl_dir.h> 29#include <sys/dsl_synctask.h> 30#include <sys/dmu_tx.h> 31#include <sys/dmu_objset.h> 32#include <sys/arc.h> 33#include <sys/zap.h> 34#include <sys/zio.h> 35#include <sys/zfs_context.h> 36#include <sys/fs/zfs.h> 37#include <sys/zfs_znode.h> 38#include <sys/spa_impl.h> 39 40int zfs_no_write_throttle = 0; 41int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ 42int zfs_txg_synctime = 5; /* target secs to sync a txg */ 43 44uint64_t zfs_write_limit_min = 32 << 20; /* min write limit is 32MB */ 45uint64_t zfs_write_limit_max = 0; /* max data payload per txg */ 46uint64_t zfs_write_limit_inflated = 0; 47uint64_t zfs_write_limit_override = 0; 48extern uint64_t zfs_write_limit_min; 49 50kmutex_t zfs_write_limit_lock; 51 52static pgcnt_t old_physmem = 0; 53 54static int 55dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) 56{ 57 uint64_t obj; 58 int err; 59 60 err = zap_lookup(dp->dp_meta_objset, 61 dp->dp_root_dir->dd_phys->dd_child_dir_zapobj, 62 name, sizeof (obj), 1, &obj); 63 if (err) 64 return (err); 65 66 return (dsl_dir_open_obj(dp, obj, name, dp, ddp)); 67} 68 69static dsl_pool_t * 70dsl_pool_open_impl(spa_t *spa, uint64_t txg) 71{ 72 dsl_pool_t *dp; 73 blkptr_t *bp = spa_get_rootblkptr(spa); 74 75 dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); 76 dp->dp_spa = spa; 77 dp->dp_meta_rootbp = *bp; 78 rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL); 79 dp->dp_write_limit = zfs_write_limit_min; 80 txg_init(dp, txg); 81 82 txg_list_create(&dp->dp_dirty_datasets, 83 offsetof(dsl_dataset_t, ds_dirty_link)); 84 txg_list_create(&dp->dp_dirty_dirs, 85 offsetof(dsl_dir_t, dd_dirty_link)); 86 txg_list_create(&dp->dp_sync_tasks, 87 offsetof(dsl_sync_task_group_t, dstg_node)); 88 list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t), 89 offsetof(dsl_dataset_t, ds_synced_link)); 90 91 mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); 92 mutex_init(&dp->dp_scrub_cancel_lock, NULL, MUTEX_DEFAULT, NULL); 93 94 dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri, 95 1, 4, 0); 96 97 return (dp); 98} 99 100int 101dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) 102{ 103 int err; 104 dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 105 dsl_dir_t *dd; 106 dsl_dataset_t *ds; 107 objset_impl_t *osi; 108 109 rw_enter(&dp->dp_config_rwlock, RW_WRITER); 110 err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &osi); 111 if (err) 112 goto out; 113 dp->dp_meta_objset = &osi->os; 114 115 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 116 DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, 117 &dp->dp_root_dir_obj); 118 if (err) 119 goto out; 120 121 err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, 122 NULL, dp, &dp->dp_root_dir); 123 if (err) 124 goto out; 125 126 err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); 127 if (err) 128 goto out; 129 130 if (spa_version(spa) >= SPA_VERSION_ORIGIN) { 131 err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); 132 if (err) 133 goto out; 134 err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj, 135 FTAG, &ds); 136 if (err == 0) { 137 err = dsl_dataset_hold_obj(dp, 138 ds->ds_phys->ds_prev_snap_obj, dp, 139 &dp->dp_origin_snap); 140 dsl_dataset_rele(ds, FTAG); 141 } 142 dsl_dir_close(dd, dp); 143 if (err) 144 goto out; 145 } 146 147 /* get scrub status */ 148 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 149 DMU_POOL_SCRUB_FUNC, sizeof (uint32_t), 1, 150 &dp->dp_scrub_func); 151 if (err == 0) { 152 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 153 DMU_POOL_SCRUB_QUEUE, sizeof (uint64_t), 1, 154 &dp->dp_scrub_queue_obj); 155 if (err) 156 goto out; 157 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 158 DMU_POOL_SCRUB_MIN_TXG, sizeof (uint64_t), 1, 159 &dp->dp_scrub_min_txg); 160 if (err) 161 goto out; 162 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 163 DMU_POOL_SCRUB_MAX_TXG, sizeof (uint64_t), 1, 164 &dp->dp_scrub_max_txg); 165 if (err) 166 goto out; 167 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 168 DMU_POOL_SCRUB_BOOKMARK, sizeof (uint64_t), 4, 169 &dp->dp_scrub_bookmark); 170 if (err) 171 goto out; 172 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 173 DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1, 174 &spa->spa_scrub_errors); 175 if (err) 176 goto out; 177 if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) { 178 /* 179 * A new-type scrub was in progress on an old 180 * pool. Restart from the beginning, since the 181 * old software may have changed the pool in the 182 * meantime. 183 */ 184 dsl_pool_scrub_restart(dp); 185 } 186 } else { 187 /* 188 * It's OK if there is no scrub in progress (and if 189 * there was an I/O error, ignore it). 190 */ 191 err = 0; 192 } 193 194out: 195 rw_exit(&dp->dp_config_rwlock); 196 if (err) 197 dsl_pool_close(dp); 198 else 199 *dpp = dp; 200 201 return (err); 202} 203 204void 205dsl_pool_close(dsl_pool_t *dp) 206{ 207 /* drop our references from dsl_pool_open() */ 208 209 /* 210 * Since we held the origin_snap from "syncing" context (which 211 * includes pool-opening context), it actually only got a "ref" 212 * and not a hold, so just drop that here. 213 */ 214 if (dp->dp_origin_snap) 215 dsl_dataset_drop_ref(dp->dp_origin_snap, dp); 216 if (dp->dp_mos_dir) 217 dsl_dir_close(dp->dp_mos_dir, dp); 218 if (dp->dp_root_dir) 219 dsl_dir_close(dp->dp_root_dir, dp); 220 221 /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ 222 if (dp->dp_meta_objset) 223 dmu_objset_evict(NULL, dp->dp_meta_objset->os); 224 225 txg_list_destroy(&dp->dp_dirty_datasets); 226 txg_list_destroy(&dp->dp_dirty_dirs); 227 txg_list_destroy(&dp->dp_sync_tasks); 228 list_destroy(&dp->dp_synced_datasets); 229 230 arc_flush(dp->dp_spa); 231 txg_fini(dp); 232 rw_destroy(&dp->dp_config_rwlock); 233 mutex_destroy(&dp->dp_lock); 234 mutex_destroy(&dp->dp_scrub_cancel_lock); 235 taskq_destroy(dp->dp_vnrele_taskq); 236 if (dp->dp_blkstats) 237 kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); 238 kmem_free(dp, sizeof (dsl_pool_t)); 239} 240 241dsl_pool_t * 242dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) 243{ 244 int err; 245 dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 246 dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); 247 objset_impl_t *osip; 248 dsl_dataset_t *ds; 249 uint64_t dsobj; 250 251 /* create and open the MOS (meta-objset) */ 252 dp->dp_meta_objset = &dmu_objset_create_impl(spa, 253 NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx)->os; 254 255 /* create the pool directory */ 256 err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 257 DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); 258 ASSERT3U(err, ==, 0); 259 260 /* create and open the root dir */ 261 dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); 262 VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj, 263 NULL, dp, &dp->dp_root_dir)); 264 265 /* create and open the meta-objset dir */ 266 (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); 267 VERIFY(0 == dsl_pool_open_special_dir(dp, 268 MOS_DIR_NAME, &dp->dp_mos_dir)); 269 270 if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) 271 dsl_pool_create_origin(dp, tx); 272 273 /* create the root dataset */ 274 dsobj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); 275 276 /* create the root objset */ 277 VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 278 osip = dmu_objset_create_impl(dp->dp_spa, ds, 279 dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); 280#ifdef _KERNEL 281 zfs_create_fs(&osip->os, kcred, zplprops, tx); 282#endif 283 dsl_dataset_rele(ds, FTAG); 284 285 dmu_tx_commit(tx); 286 287 return (dp); 288} 289 290void 291dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) 292{ 293 zio_t *zio; 294 dmu_tx_t *tx; 295 dsl_dir_t *dd; 296 dsl_dataset_t *ds; 297 dsl_sync_task_group_t *dstg; 298 objset_impl_t *mosi = dp->dp_meta_objset->os; 299 hrtime_t start, write_time; 300 uint64_t data_written; 301 int err; 302 303 tx = dmu_tx_create_assigned(dp, txg); 304 305 dp->dp_read_overhead = 0; 306 start = gethrtime(); 307 308 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 309 while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { 310 /* 311 * We must not sync any non-MOS datasets twice, because 312 * we may have taken a snapshot of them. However, we 313 * may sync newly-created datasets on pass 2. 314 */ 315 ASSERT(!list_link_active(&ds->ds_synced_link)); 316 list_insert_tail(&dp->dp_synced_datasets, ds); 317 dsl_dataset_sync(ds, zio, tx); 318 } 319 DTRACE_PROBE(pool_sync__1setup); 320 err = zio_wait(zio); 321 322 write_time = gethrtime() - start; 323 ASSERT(err == 0); 324 DTRACE_PROBE(pool_sync__2rootzio); 325 326 for (ds = list_head(&dp->dp_synced_datasets); ds; 327 ds = list_next(&dp->dp_synced_datasets, ds)) 328 dmu_objset_do_userquota_callbacks(ds->ds_user_ptr, tx); 329 330 /* 331 * Sync the datasets again to push out the changes due to 332 * userquota updates. This must be done before we process the 333 * sync tasks, because that could cause a snapshot of a dataset 334 * whose ds_bp will be rewritten when we do this 2nd sync. 335 */ 336 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 337 while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { 338 ASSERT(list_link_active(&ds->ds_synced_link)); 339 dmu_buf_rele(ds->ds_dbuf, ds); 340 dsl_dataset_sync(ds, zio, tx); 341 } 342 err = zio_wait(zio); 343 344 while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) { 345 /* 346 * No more sync tasks should have been added while we 347 * were syncing. 348 */ 349 ASSERT(spa_sync_pass(dp->dp_spa) == 1); 350 dsl_sync_task_group_sync(dstg, tx); 351 } 352 DTRACE_PROBE(pool_sync__3task); 353 354 start = gethrtime(); 355 while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) 356 dsl_dir_sync(dd, tx); 357 write_time += gethrtime() - start; 358 359 if (spa_sync_pass(dp->dp_spa) == 1) 360 dsl_pool_scrub_sync(dp, tx); 361 362 start = gethrtime(); 363 if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL || 364 list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) { 365 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 366 dmu_objset_sync(mosi, zio, tx); 367 err = zio_wait(zio); 368 ASSERT(err == 0); 369 dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); 370 spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); 371 } 372 write_time += gethrtime() - start; 373 DTRACE_PROBE2(pool_sync__4io, hrtime_t, write_time, 374 hrtime_t, dp->dp_read_overhead); 375 write_time -= dp->dp_read_overhead; 376 377 dmu_tx_commit(tx); 378 379 data_written = dp->dp_space_towrite[txg & TXG_MASK]; 380 dp->dp_space_towrite[txg & TXG_MASK] = 0; 381 ASSERT(dp->dp_tempreserved[txg & TXG_MASK] == 0); 382 383 /* 384 * If the write limit max has not been explicitly set, set it 385 * to a fraction of available physical memory (default 1/8th). 386 * Note that we must inflate the limit because the spa 387 * inflates write sizes to account for data replication. 388 * Check this each sync phase to catch changing memory size. 389 */ 390 if (physmem != old_physmem && zfs_write_limit_shift) { 391 mutex_enter(&zfs_write_limit_lock); 392 old_physmem = physmem; 393 zfs_write_limit_max = ptob(physmem) >> zfs_write_limit_shift; 394 zfs_write_limit_inflated = MAX(zfs_write_limit_min, 395 spa_get_asize(dp->dp_spa, zfs_write_limit_max)); 396 mutex_exit(&zfs_write_limit_lock); 397 } 398 399 /* 400 * Attempt to keep the sync time consistent by adjusting the 401 * amount of write traffic allowed into each transaction group. 402 * Weight the throughput calculation towards the current value: 403 * thru = 3/4 old_thru + 1/4 new_thru 404 */ 405 ASSERT(zfs_write_limit_min > 0); 406 if (data_written > zfs_write_limit_min / 8 && write_time > 0) { 407 uint64_t throughput = (data_written * NANOSEC) / write_time; 408 if (dp->dp_throughput) 409 dp->dp_throughput = throughput / 4 + 410 3 * dp->dp_throughput / 4; 411 else 412 dp->dp_throughput = throughput; 413 dp->dp_write_limit = MIN(zfs_write_limit_inflated, 414 MAX(zfs_write_limit_min, 415 dp->dp_throughput * zfs_txg_synctime)); 416 } 417} 418 419void 420dsl_pool_zil_clean(dsl_pool_t *dp) 421{ 422 dsl_dataset_t *ds; 423 424 while (ds = list_head(&dp->dp_synced_datasets)) { 425 list_remove(&dp->dp_synced_datasets, ds); 426 ASSERT(ds->ds_user_ptr != NULL); 427 zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil); 428 dmu_buf_rele(ds->ds_dbuf, ds); 429 } 430} 431 432/* 433 * TRUE if the current thread is the tx_sync_thread or if we 434 * are being called from SPA context during pool initialization. 435 */ 436int 437dsl_pool_sync_context(dsl_pool_t *dp) 438{ 439 return (curthread == dp->dp_tx.tx_sync_thread || 440 spa_get_dsl(dp->dp_spa) == NULL); 441} 442 443uint64_t 444dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree) 445{ 446 uint64_t space, resv; 447 448 /* 449 * Reserve about 1.6% (1/64), or at least 32MB, for allocation 450 * efficiency. 451 * XXX The intent log is not accounted for, so it must fit 452 * within this slop. 453 * 454 * If we're trying to assess whether it's OK to do a free, 455 * cut the reservation in half to allow forward progress 456 * (e.g. make it possible to rm(1) files from a full pool). 457 */ 458 space = spa_get_dspace(dp->dp_spa); 459 resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1); 460 if (netfree) 461 resv >>= 1; 462 463 return (space - resv); 464} 465 466int 467dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx) 468{ 469 uint64_t reserved = 0; 470 uint64_t write_limit = (zfs_write_limit_override ? 471 zfs_write_limit_override : dp->dp_write_limit); 472 473 if (zfs_no_write_throttle) { 474 atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], 475 space); 476 return (0); 477 } 478 479 /* 480 * Check to see if we have exceeded the maximum allowed IO for 481 * this transaction group. We can do this without locks since 482 * a little slop here is ok. Note that we do the reserved check 483 * with only half the requested reserve: this is because the 484 * reserve requests are worst-case, and we really don't want to 485 * throttle based off of worst-case estimates. 486 */ 487 if (write_limit > 0) { 488 reserved = dp->dp_space_towrite[tx->tx_txg & TXG_MASK] 489 + dp->dp_tempreserved[tx->tx_txg & TXG_MASK] / 2; 490 491 if (reserved && reserved > write_limit) 492 return (ERESTART); 493 } 494 495 atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space); 496 497 /* 498 * If this transaction group is over 7/8ths capacity, delay 499 * the caller 1 clock tick. This will slow down the "fill" 500 * rate until the sync process can catch up with us. 501 */ 502 if (reserved && reserved > (write_limit - (write_limit >> 3))) 503 txg_delay(dp, tx->tx_txg, 1); 504 505 return (0); 506} 507 508void 509dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) 510{ 511 ASSERT(dp->dp_tempreserved[tx->tx_txg & TXG_MASK] >= space); 512 atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], -space); 513} 514 515void 516dsl_pool_memory_pressure(dsl_pool_t *dp) 517{ 518 uint64_t space_inuse = 0; 519 int i; 520 521 if (dp->dp_write_limit == zfs_write_limit_min) 522 return; 523 524 for (i = 0; i < TXG_SIZE; i++) { 525 space_inuse += dp->dp_space_towrite[i]; 526 space_inuse += dp->dp_tempreserved[i]; 527 } 528 dp->dp_write_limit = MAX(zfs_write_limit_min, 529 MIN(dp->dp_write_limit, space_inuse / 4)); 530} 531 532void 533dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) 534{ 535 if (space > 0) { 536 mutex_enter(&dp->dp_lock); 537 dp->dp_space_towrite[tx->tx_txg & TXG_MASK] += space; 538 mutex_exit(&dp->dp_lock); 539 } 540} 541 542/* ARGSUSED */ 543static int 544upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) 545{ 546 dmu_tx_t *tx = arg; 547 dsl_dataset_t *ds, *prev = NULL; 548 int err; 549 dsl_pool_t *dp = spa_get_dsl(spa); 550 551 err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); 552 if (err) 553 return (err); 554 555 while (ds->ds_phys->ds_prev_snap_obj != 0) { 556 err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, 557 FTAG, &prev); 558 if (err) { 559 dsl_dataset_rele(ds, FTAG); 560 return (err); 561 } 562 563 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) 564 break; 565 dsl_dataset_rele(ds, FTAG); 566 ds = prev; 567 prev = NULL; 568 } 569 570 if (prev == NULL) { 571 prev = dp->dp_origin_snap; 572 573 /* 574 * The $ORIGIN can't have any data, or the accounting 575 * will be wrong. 576 */ 577 ASSERT(prev->ds_phys->ds_bp.blk_birth == 0); 578 579 /* The origin doesn't get attached to itself */ 580 if (ds->ds_object == prev->ds_object) { 581 dsl_dataset_rele(ds, FTAG); 582 return (0); 583 } 584 585 dmu_buf_will_dirty(ds->ds_dbuf, tx); 586 ds->ds_phys->ds_prev_snap_obj = prev->ds_object; 587 ds->ds_phys->ds_prev_snap_txg = prev->ds_phys->ds_creation_txg; 588 589 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 590 ds->ds_dir->dd_phys->dd_origin_obj = prev->ds_object; 591 592 dmu_buf_will_dirty(prev->ds_dbuf, tx); 593 prev->ds_phys->ds_num_children++; 594 595 if (ds->ds_phys->ds_next_snap_obj == 0) { 596 ASSERT(ds->ds_prev == NULL); 597 VERIFY(0 == dsl_dataset_hold_obj(dp, 598 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 599 } 600 } 601 602 ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object); 603 ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object); 604 605 if (prev->ds_phys->ds_next_clones_obj == 0) { 606 dmu_buf_will_dirty(prev->ds_dbuf, tx); 607 prev->ds_phys->ds_next_clones_obj = 608 zap_create(dp->dp_meta_objset, 609 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 610 } 611 VERIFY(0 == zap_add_int(dp->dp_meta_objset, 612 prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx)); 613 614 dsl_dataset_rele(ds, FTAG); 615 if (prev != dp->dp_origin_snap) 616 dsl_dataset_rele(prev, FTAG); 617 return (0); 618} 619 620void 621dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx) 622{ 623 ASSERT(dmu_tx_is_syncing(tx)); 624 ASSERT(dp->dp_origin_snap != NULL); 625 626 VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb, 627 tx, DS_FIND_CHILDREN)); 628} 629 630void 631dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) 632{ 633 uint64_t dsobj; 634 dsl_dataset_t *ds; 635 636 ASSERT(dmu_tx_is_syncing(tx)); 637 ASSERT(dp->dp_origin_snap == NULL); 638 639 /* create the origin dir, ds, & snap-ds */ 640 rw_enter(&dp->dp_config_rwlock, RW_WRITER); 641 dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, 642 NULL, 0, kcred, tx); 643 VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 644 dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, kcred, tx); 645 VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, 646 dp, &dp->dp_origin_snap)); 647 dsl_dataset_rele(ds, FTAG); 648 rw_exit(&dp->dp_config_rwlock); 649} 650 651taskq_t * 652dsl_pool_vnrele_taskq(dsl_pool_t *dp) 653{ 654 return (dp->dp_vnrele_taskq); 655} 656