dmu_send.c revision 265744
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2013 by Delphix. All rights reserved. 25 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 26 * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved. 27 */ 28 29#include <sys/dmu.h> 30#include <sys/dmu_impl.h> 31#include <sys/dmu_tx.h> 32#include <sys/dbuf.h> 33#include <sys/dnode.h> 34#include <sys/zfs_context.h> 35#include <sys/dmu_objset.h> 36#include <sys/dmu_traverse.h> 37#include <sys/dsl_dataset.h> 38#include <sys/dsl_dir.h> 39#include <sys/dsl_prop.h> 40#include <sys/dsl_pool.h> 41#include <sys/dsl_synctask.h> 42#include <sys/zfs_ioctl.h> 43#include <sys/zap.h> 44#include <sys/zio_checksum.h> 45#include <sys/zfs_znode.h> 46#include <zfs_fletcher.h> 47#include <sys/avl.h> 48#include <sys/ddt.h> 49#include <sys/zfs_onexit.h> 50#include <sys/dmu_send.h> 51#include <sys/dsl_destroy.h> 52#include <sys/dsl_bookmark.h> 53 54/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ 55int zfs_send_corrupt_data = B_FALSE; 56 57static char *dmu_recv_tag = "dmu_recv_tag"; 58static const char *recv_clone_name = "%recv"; 59 60static int 61dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) 62{ 63 dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; 64 struct uio auio; 65 struct iovec aiov; 66 ASSERT0(len % 8); 67 68 fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); 69 aiov.iov_base = buf; 70 aiov.iov_len = len; 71 auio.uio_iov = &aiov; 72 auio.uio_iovcnt = 1; 73 auio.uio_resid = len; 74 auio.uio_segflg = UIO_SYSSPACE; 75 auio.uio_rw = UIO_WRITE; 76 auio.uio_offset = (off_t)-1; 77 auio.uio_td = dsp->dsa_td; 78#ifdef _KERNEL 79 if (dsp->dsa_fp->f_type == DTYPE_VNODE) 80 bwillwrite(); 81 dsp->dsa_err = fo_write(dsp->dsa_fp, &auio, dsp->dsa_td->td_ucred, 0, 82 dsp->dsa_td); 83#else 84 fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 85 dsp->dsa_err = EOPNOTSUPP; 86#endif 87 mutex_enter(&ds->ds_sendstream_lock); 88 *dsp->dsa_off += len; 89 mutex_exit(&ds->ds_sendstream_lock); 90 91 return (dsp->dsa_err); 92} 93 94static int 95dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 96 uint64_t length) 97{ 98 struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); 99 100 /* 101 * When we receive a free record, dbuf_free_range() assumes 102 * that the receiving system doesn't have any dbufs in the range 103 * being freed. This is always true because there is a one-record 104 * constraint: we only send one WRITE record for any given 105 * object+offset. We know that the one-record constraint is 106 * true because we always send data in increasing order by 107 * object,offset. 108 * 109 * If the increasing-order constraint ever changes, we should find 110 * another way to assert that the one-record constraint is still 111 * satisfied. 112 */ 113 ASSERT(object > dsp->dsa_last_data_object || 114 (object == dsp->dsa_last_data_object && 115 offset > dsp->dsa_last_data_offset)); 116 117 /* 118 * If we are doing a non-incremental send, then there can't 119 * be any data in the dataset we're receiving into. Therefore 120 * a free record would simply be a no-op. Save space by not 121 * sending it to begin with. 122 */ 123 if (!dsp->dsa_incremental) 124 return (0); 125 126 if (length != -1ULL && offset + length < offset) 127 length = -1ULL; 128 129 /* 130 * If there is a pending op, but it's not PENDING_FREE, push it out, 131 * since free block aggregation can only be done for blocks of the 132 * same type (i.e., DRR_FREE records can only be aggregated with 133 * other DRR_FREE records. DRR_FREEOBJECTS records can only be 134 * aggregated with other DRR_FREEOBJECTS records. 135 */ 136 if (dsp->dsa_pending_op != PENDING_NONE && 137 dsp->dsa_pending_op != PENDING_FREE) { 138 if (dump_bytes(dsp, dsp->dsa_drr, 139 sizeof (dmu_replay_record_t)) != 0) 140 return (SET_ERROR(EINTR)); 141 dsp->dsa_pending_op = PENDING_NONE; 142 } 143 144 if (dsp->dsa_pending_op == PENDING_FREE) { 145 /* 146 * There should never be a PENDING_FREE if length is -1 147 * (because dump_dnode is the only place where this 148 * function is called with a -1, and only after flushing 149 * any pending record). 150 */ 151 ASSERT(length != -1ULL); 152 /* 153 * Check to see whether this free block can be aggregated 154 * with pending one. 155 */ 156 if (drrf->drr_object == object && drrf->drr_offset + 157 drrf->drr_length == offset) { 158 drrf->drr_length += length; 159 return (0); 160 } else { 161 /* not a continuation. Push out pending record */ 162 if (dump_bytes(dsp, dsp->dsa_drr, 163 sizeof (dmu_replay_record_t)) != 0) 164 return (SET_ERROR(EINTR)); 165 dsp->dsa_pending_op = PENDING_NONE; 166 } 167 } 168 /* create a FREE record and make it pending */ 169 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 170 dsp->dsa_drr->drr_type = DRR_FREE; 171 drrf->drr_object = object; 172 drrf->drr_offset = offset; 173 drrf->drr_length = length; 174 drrf->drr_toguid = dsp->dsa_toguid; 175 if (length == -1ULL) { 176 if (dump_bytes(dsp, dsp->dsa_drr, 177 sizeof (dmu_replay_record_t)) != 0) 178 return (SET_ERROR(EINTR)); 179 } else { 180 dsp->dsa_pending_op = PENDING_FREE; 181 } 182 183 return (0); 184} 185 186static int 187dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, 188 uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) 189{ 190 struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); 191 192 /* 193 * We send data in increasing object, offset order. 194 * See comment in dump_free() for details. 195 */ 196 ASSERT(object > dsp->dsa_last_data_object || 197 (object == dsp->dsa_last_data_object && 198 offset > dsp->dsa_last_data_offset)); 199 dsp->dsa_last_data_object = object; 200 dsp->dsa_last_data_offset = offset + blksz - 1; 201 202 /* 203 * If there is any kind of pending aggregation (currently either 204 * a grouping of free objects or free blocks), push it out to 205 * the stream, since aggregation can't be done across operations 206 * of different types. 207 */ 208 if (dsp->dsa_pending_op != PENDING_NONE) { 209 if (dump_bytes(dsp, dsp->dsa_drr, 210 sizeof (dmu_replay_record_t)) != 0) 211 return (SET_ERROR(EINTR)); 212 dsp->dsa_pending_op = PENDING_NONE; 213 } 214 /* write a DATA record */ 215 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 216 dsp->dsa_drr->drr_type = DRR_WRITE; 217 drrw->drr_object = object; 218 drrw->drr_type = type; 219 drrw->drr_offset = offset; 220 drrw->drr_length = blksz; 221 drrw->drr_toguid = dsp->dsa_toguid; 222 drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); 223 if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) 224 drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; 225 DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); 226 DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); 227 DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); 228 drrw->drr_key.ddk_cksum = bp->blk_cksum; 229 230 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 231 return (SET_ERROR(EINTR)); 232 if (dump_bytes(dsp, data, blksz) != 0) 233 return (SET_ERROR(EINTR)); 234 return (0); 235} 236 237static int 238dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) 239{ 240 struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); 241 242 if (dsp->dsa_pending_op != PENDING_NONE) { 243 if (dump_bytes(dsp, dsp->dsa_drr, 244 sizeof (dmu_replay_record_t)) != 0) 245 return (SET_ERROR(EINTR)); 246 dsp->dsa_pending_op = PENDING_NONE; 247 } 248 249 /* write a SPILL record */ 250 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 251 dsp->dsa_drr->drr_type = DRR_SPILL; 252 drrs->drr_object = object; 253 drrs->drr_length = blksz; 254 drrs->drr_toguid = dsp->dsa_toguid; 255 256 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) 257 return (SET_ERROR(EINTR)); 258 if (dump_bytes(dsp, data, blksz)) 259 return (SET_ERROR(EINTR)); 260 return (0); 261} 262 263static int 264dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) 265{ 266 struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); 267 268 /* See comment in dump_free(). */ 269 if (!dsp->dsa_incremental) 270 return (0); 271 272 /* 273 * If there is a pending op, but it's not PENDING_FREEOBJECTS, 274 * push it out, since free block aggregation can only be done for 275 * blocks of the same type (i.e., DRR_FREE records can only be 276 * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records 277 * can only be aggregated with other DRR_FREEOBJECTS records. 278 */ 279 if (dsp->dsa_pending_op != PENDING_NONE && 280 dsp->dsa_pending_op != PENDING_FREEOBJECTS) { 281 if (dump_bytes(dsp, dsp->dsa_drr, 282 sizeof (dmu_replay_record_t)) != 0) 283 return (SET_ERROR(EINTR)); 284 dsp->dsa_pending_op = PENDING_NONE; 285 } 286 if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { 287 /* 288 * See whether this free object array can be aggregated 289 * with pending one 290 */ 291 if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { 292 drrfo->drr_numobjs += numobjs; 293 return (0); 294 } else { 295 /* can't be aggregated. Push out pending record */ 296 if (dump_bytes(dsp, dsp->dsa_drr, 297 sizeof (dmu_replay_record_t)) != 0) 298 return (SET_ERROR(EINTR)); 299 dsp->dsa_pending_op = PENDING_NONE; 300 } 301 } 302 303 /* write a FREEOBJECTS record */ 304 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 305 dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; 306 drrfo->drr_firstobj = firstobj; 307 drrfo->drr_numobjs = numobjs; 308 drrfo->drr_toguid = dsp->dsa_toguid; 309 310 dsp->dsa_pending_op = PENDING_FREEOBJECTS; 311 312 return (0); 313} 314 315static int 316dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) 317{ 318 struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); 319 320 if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 321 return (dump_freeobjects(dsp, object, 1)); 322 323 if (dsp->dsa_pending_op != PENDING_NONE) { 324 if (dump_bytes(dsp, dsp->dsa_drr, 325 sizeof (dmu_replay_record_t)) != 0) 326 return (SET_ERROR(EINTR)); 327 dsp->dsa_pending_op = PENDING_NONE; 328 } 329 330 /* write an OBJECT record */ 331 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 332 dsp->dsa_drr->drr_type = DRR_OBJECT; 333 drro->drr_object = object; 334 drro->drr_type = dnp->dn_type; 335 drro->drr_bonustype = dnp->dn_bonustype; 336 drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 337 drro->drr_bonuslen = dnp->dn_bonuslen; 338 drro->drr_checksumtype = dnp->dn_checksum; 339 drro->drr_compress = dnp->dn_compress; 340 drro->drr_toguid = dsp->dsa_toguid; 341 342 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 343 return (SET_ERROR(EINTR)); 344 345 if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) 346 return (SET_ERROR(EINTR)); 347 348 /* Free anything past the end of the file. */ 349 if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * 350 (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) 351 return (SET_ERROR(EINTR)); 352 if (dsp->dsa_err != 0) 353 return (SET_ERROR(EINTR)); 354 return (0); 355} 356 357#define BP_SPAN(dnp, level) \ 358 (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 359 (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 360 361/* ARGSUSED */ 362static int 363backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 364 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 365{ 366 dmu_sendarg_t *dsp = arg; 367 dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 368 int err = 0; 369 370 if (issig(JUSTLOOKING) && issig(FORREAL)) 371 return (SET_ERROR(EINTR)); 372 373 if (zb->zb_object != DMU_META_DNODE_OBJECT && 374 DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { 375 return (0); 376 } else if (zb->zb_level == ZB_ZIL_LEVEL) { 377 /* 378 * If we are sending a non-snapshot (which is allowed on 379 * read-only pools), it may have a ZIL, which must be ignored. 380 */ 381 return (0); 382 } else if (BP_IS_HOLE(bp) && 383 zb->zb_object == DMU_META_DNODE_OBJECT) { 384 uint64_t span = BP_SPAN(dnp, zb->zb_level); 385 uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; 386 err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); 387 } else if (BP_IS_HOLE(bp)) { 388 uint64_t span = BP_SPAN(dnp, zb->zb_level); 389 err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); 390 } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { 391 return (0); 392 } else if (type == DMU_OT_DNODE) { 393 dnode_phys_t *blk; 394 int i; 395 int blksz = BP_GET_LSIZE(bp); 396 uint32_t aflags = ARC_WAIT; 397 arc_buf_t *abuf; 398 399 if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 400 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 401 &aflags, zb) != 0) 402 return (SET_ERROR(EIO)); 403 404 blk = abuf->b_data; 405 for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 406 uint64_t dnobj = (zb->zb_blkid << 407 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 408 err = dump_dnode(dsp, dnobj, blk+i); 409 if (err != 0) 410 break; 411 } 412 (void) arc_buf_remove_ref(abuf, &abuf); 413 } else if (type == DMU_OT_SA) { 414 uint32_t aflags = ARC_WAIT; 415 arc_buf_t *abuf; 416 int blksz = BP_GET_LSIZE(bp); 417 418 if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 419 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 420 &aflags, zb) != 0) 421 return (SET_ERROR(EIO)); 422 423 err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); 424 (void) arc_buf_remove_ref(abuf, &abuf); 425 } else { /* it's a level-0 block of a regular object */ 426 uint32_t aflags = ARC_WAIT; 427 arc_buf_t *abuf; 428 int blksz = BP_GET_LSIZE(bp); 429 430 ASSERT0(zb->zb_level); 431 if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 432 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 433 &aflags, zb) != 0) { 434 if (zfs_send_corrupt_data) { 435 /* Send a block filled with 0x"zfs badd bloc" */ 436 abuf = arc_buf_alloc(spa, blksz, &abuf, 437 ARC_BUFC_DATA); 438 uint64_t *ptr; 439 for (ptr = abuf->b_data; 440 (char *)ptr < (char *)abuf->b_data + blksz; 441 ptr++) 442 *ptr = 0x2f5baddb10c; 443 } else { 444 return (SET_ERROR(EIO)); 445 } 446 } 447 448 err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz, 449 blksz, bp, abuf->b_data); 450 (void) arc_buf_remove_ref(abuf, &abuf); 451 } 452 453 ASSERT(err == 0 || err == EINTR); 454 return (err); 455} 456 457/* 458 * Releases dp using the specified tag. 459 */ 460static int 461dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, 462#ifdef illumos 463 zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd, 464 vnode_t *vp, offset_t *off) 465#else 466 zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd, 467 struct file *fp, offset_t *off) 468#endif 469{ 470 objset_t *os; 471 dmu_replay_record_t *drr; 472 dmu_sendarg_t *dsp; 473 int err; 474 uint64_t fromtxg = 0; 475 476 err = dmu_objset_from_ds(ds, &os); 477 if (err != 0) { 478 dsl_pool_rele(dp, tag); 479 return (err); 480 } 481 482 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 483 drr->drr_type = DRR_BEGIN; 484 drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 485 DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, 486 DMU_SUBSTREAM); 487 488#ifdef _KERNEL 489 if (dmu_objset_type(os) == DMU_OST_ZFS) { 490 uint64_t version; 491 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { 492 kmem_free(drr, sizeof (dmu_replay_record_t)); 493 dsl_pool_rele(dp, tag); 494 return (SET_ERROR(EINVAL)); 495 } 496 if (version >= ZPL_VERSION_SA) { 497 DMU_SET_FEATUREFLAGS( 498 drr->drr_u.drr_begin.drr_versioninfo, 499 DMU_BACKUP_FEATURE_SA_SPILL); 500 } 501 } 502#endif 503 504 drr->drr_u.drr_begin.drr_creation_time = 505 ds->ds_phys->ds_creation_time; 506 drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); 507 if (is_clone) 508 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 509 drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 510 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 511 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 512 513 if (fromzb != NULL) { 514 drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid; 515 fromtxg = fromzb->zbm_creation_txg; 516 } 517 dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 518 if (!dsl_dataset_is_snapshot(ds)) { 519 (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--", 520 sizeof (drr->drr_u.drr_begin.drr_toname)); 521 } 522 523 dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); 524 525 dsp->dsa_drr = drr; 526 dsp->dsa_outfd = outfd; 527 dsp->dsa_proc = curproc; 528 dsp->dsa_td = curthread; 529 dsp->dsa_fp = fp; 530 dsp->dsa_os = os; 531 dsp->dsa_off = off; 532 dsp->dsa_toguid = ds->ds_phys->ds_guid; 533 ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); 534 dsp->dsa_pending_op = PENDING_NONE; 535 dsp->dsa_incremental = (fromzb != NULL); 536 537 mutex_enter(&ds->ds_sendstream_lock); 538 list_insert_head(&ds->ds_sendstreams, dsp); 539 mutex_exit(&ds->ds_sendstream_lock); 540 541 dsl_dataset_long_hold(ds, FTAG); 542 dsl_pool_rele(dp, tag); 543 544 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 545 err = dsp->dsa_err; 546 goto out; 547 } 548 549 err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, 550 backup_cb, dsp); 551 552 if (dsp->dsa_pending_op != PENDING_NONE) 553 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) 554 err = SET_ERROR(EINTR); 555 556 if (err != 0) { 557 if (err == EINTR && dsp->dsa_err != 0) 558 err = dsp->dsa_err; 559 goto out; 560 } 561 562 bzero(drr, sizeof (dmu_replay_record_t)); 563 drr->drr_type = DRR_END; 564 drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; 565 drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; 566 567 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 568 err = dsp->dsa_err; 569 goto out; 570 } 571 572out: 573 mutex_enter(&ds->ds_sendstream_lock); 574 list_remove(&ds->ds_sendstreams, dsp); 575 mutex_exit(&ds->ds_sendstream_lock); 576 577 kmem_free(drr, sizeof (dmu_replay_record_t)); 578 kmem_free(dsp, sizeof (dmu_sendarg_t)); 579 580 dsl_dataset_long_rele(ds, FTAG); 581 582 return (err); 583} 584 585int 586dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, 587#ifdef illumos 588 int outfd, vnode_t *vp, offset_t *off) 589#else 590 int outfd, struct file *fp, offset_t *off) 591#endif 592{ 593 dsl_pool_t *dp; 594 dsl_dataset_t *ds; 595 dsl_dataset_t *fromds = NULL; 596 int err; 597 598 err = dsl_pool_hold(pool, FTAG, &dp); 599 if (err != 0) 600 return (err); 601 602 err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); 603 if (err != 0) { 604 dsl_pool_rele(dp, FTAG); 605 return (err); 606 } 607 608 if (fromsnap != 0) { 609 zfs_bookmark_phys_t zb; 610 boolean_t is_clone; 611 612 err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); 613 if (err != 0) { 614 dsl_dataset_rele(ds, FTAG); 615 dsl_pool_rele(dp, FTAG); 616 return (err); 617 } 618 if (!dsl_dataset_is_before(ds, fromds, 0)) 619 err = SET_ERROR(EXDEV); 620 zb.zbm_creation_time = fromds->ds_phys->ds_creation_time; 621 zb.zbm_creation_txg = fromds->ds_phys->ds_creation_txg; 622 zb.zbm_guid = fromds->ds_phys->ds_guid; 623 is_clone = (fromds->ds_dir != ds->ds_dir); 624 dsl_dataset_rele(fromds, FTAG); 625 err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, 626 outfd, fp, off); 627 } else { 628 err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, 629 outfd, fp, off); 630 } 631 dsl_dataset_rele(ds, FTAG); 632 return (err); 633} 634 635int 636dmu_send(const char *tosnap, const char *fromsnap, 637#ifdef illumos 638 int outfd, vnode_t *vp, offset_t *off) 639#else 640 int outfd, struct file *fp, offset_t *off) 641#endif 642{ 643 dsl_pool_t *dp; 644 dsl_dataset_t *ds; 645 int err; 646 boolean_t owned = B_FALSE; 647 648 if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) 649 return (SET_ERROR(EINVAL)); 650 651 err = dsl_pool_hold(tosnap, FTAG, &dp); 652 if (err != 0) 653 return (err); 654 655 if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) { 656 /* 657 * We are sending a filesystem or volume. Ensure 658 * that it doesn't change by owning the dataset. 659 */ 660 err = dsl_dataset_own(dp, tosnap, FTAG, &ds); 661 owned = B_TRUE; 662 } else { 663 err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); 664 } 665 if (err != 0) { 666 dsl_pool_rele(dp, FTAG); 667 return (err); 668 } 669 670 if (fromsnap != NULL) { 671 zfs_bookmark_phys_t zb; 672 boolean_t is_clone = B_FALSE; 673 int fsnamelen = strchr(tosnap, '@') - tosnap; 674 675 /* 676 * If the fromsnap is in a different filesystem, then 677 * mark the send stream as a clone. 678 */ 679 if (strncmp(tosnap, fromsnap, fsnamelen) != 0 || 680 (fromsnap[fsnamelen] != '@' && 681 fromsnap[fsnamelen] != '#')) { 682 is_clone = B_TRUE; 683 } 684 685 if (strchr(fromsnap, '@')) { 686 dsl_dataset_t *fromds; 687 err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); 688 if (err == 0) { 689 if (!dsl_dataset_is_before(ds, fromds, 0)) 690 err = SET_ERROR(EXDEV); 691 zb.zbm_creation_time = 692 fromds->ds_phys->ds_creation_time; 693 zb.zbm_creation_txg = 694 fromds->ds_phys->ds_creation_txg; 695 zb.zbm_guid = fromds->ds_phys->ds_guid; 696 is_clone = (ds->ds_dir != fromds->ds_dir); 697 dsl_dataset_rele(fromds, FTAG); 698 } 699 } else { 700 err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); 701 } 702 if (err != 0) { 703 dsl_dataset_rele(ds, FTAG); 704 dsl_pool_rele(dp, FTAG); 705 return (err); 706 } 707 err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, 708 outfd, fp, off); 709 } else { 710 err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, 711 outfd, fp, off); 712 } 713 if (owned) 714 dsl_dataset_disown(ds, FTAG); 715 else 716 dsl_dataset_rele(ds, FTAG); 717 return (err); 718} 719 720int 721dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) 722{ 723 dsl_pool_t *dp = ds->ds_dir->dd_pool; 724 int err; 725 uint64_t size; 726 727 ASSERT(dsl_pool_config_held(dp)); 728 729 /* tosnap must be a snapshot */ 730 if (!dsl_dataset_is_snapshot(ds)) 731 return (SET_ERROR(EINVAL)); 732 733 /* 734 * fromsnap must be an earlier snapshot from the same fs as tosnap, 735 * or the origin's fs. 736 */ 737 if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0)) 738 return (SET_ERROR(EXDEV)); 739 740 /* Get uncompressed size estimate of changed data. */ 741 if (fromds == NULL) { 742 size = ds->ds_phys->ds_uncompressed_bytes; 743 } else { 744 uint64_t used, comp; 745 err = dsl_dataset_space_written(fromds, ds, 746 &used, &comp, &size); 747 if (err != 0) 748 return (err); 749 } 750 751 /* 752 * Assume that space (both on-disk and in-stream) is dominated by 753 * data. We will adjust for indirect blocks and the copies property, 754 * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). 755 */ 756 757 /* 758 * Subtract out approximate space used by indirect blocks. 759 * Assume most space is used by data blocks (non-indirect, non-dnode). 760 * Assume all blocks are recordsize. Assume ditto blocks and 761 * internal fragmentation counter out compression. 762 * 763 * Therefore, space used by indirect blocks is sizeof(blkptr_t) per 764 * block, which we observe in practice. 765 */ 766 uint64_t recordsize; 767 err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize); 768 if (err != 0) 769 return (err); 770 size -= size / recordsize * sizeof (blkptr_t); 771 772 /* Add in the space for the record associated with each block. */ 773 size += size / recordsize * sizeof (dmu_replay_record_t); 774 775 *sizep = size; 776 777 return (0); 778} 779 780typedef struct dmu_recv_begin_arg { 781 const char *drba_origin; 782 dmu_recv_cookie_t *drba_cookie; 783 cred_t *drba_cred; 784 uint64_t drba_snapobj; 785} dmu_recv_begin_arg_t; 786 787static int 788recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, 789 uint64_t fromguid) 790{ 791 uint64_t val; 792 int error; 793 dsl_pool_t *dp = ds->ds_dir->dd_pool; 794 795 /* temporary clone name must not exist */ 796 error = zap_lookup(dp->dp_meta_objset, 797 ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name, 798 8, 1, &val); 799 if (error != ENOENT) 800 return (error == 0 ? EBUSY : error); 801 802 /* new snapshot name must not exist */ 803 error = zap_lookup(dp->dp_meta_objset, 804 ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 805 8, 1, &val); 806 if (error != ENOENT) 807 return (error == 0 ? EEXIST : error); 808 809 /* 810 * Check snapshot limit before receiving. We'll recheck again at the 811 * end, but might as well abort before receiving if we're already over 812 * the limit. 813 * 814 * Note that we do not check the file system limit with 815 * dsl_dir_fscount_check because the temporary %clones don't count 816 * against that limit. 817 */ 818 error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, 819 NULL, drba->drba_cred); 820 if (error != 0) 821 return (error); 822 823 if (fromguid != 0) { 824 dsl_dataset_t *snap; 825 uint64_t obj = ds->ds_phys->ds_prev_snap_obj; 826 827 /* Find snapshot in this dir that matches fromguid. */ 828 while (obj != 0) { 829 error = dsl_dataset_hold_obj(dp, obj, FTAG, 830 &snap); 831 if (error != 0) 832 return (SET_ERROR(ENODEV)); 833 if (snap->ds_dir != ds->ds_dir) { 834 dsl_dataset_rele(snap, FTAG); 835 return (SET_ERROR(ENODEV)); 836 } 837 if (snap->ds_phys->ds_guid == fromguid) 838 break; 839 obj = snap->ds_phys->ds_prev_snap_obj; 840 dsl_dataset_rele(snap, FTAG); 841 } 842 if (obj == 0) 843 return (SET_ERROR(ENODEV)); 844 845 if (drba->drba_cookie->drc_force) { 846 drba->drba_snapobj = obj; 847 } else { 848 /* 849 * If we are not forcing, there must be no 850 * changes since fromsnap. 851 */ 852 if (dsl_dataset_modified_since_snap(ds, snap)) { 853 dsl_dataset_rele(snap, FTAG); 854 return (SET_ERROR(ETXTBSY)); 855 } 856 drba->drba_snapobj = ds->ds_prev->ds_object; 857 } 858 859 dsl_dataset_rele(snap, FTAG); 860 } else { 861 /* if full, most recent snapshot must be $ORIGIN */ 862 if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) 863 return (SET_ERROR(ENODEV)); 864 drba->drba_snapobj = ds->ds_phys->ds_prev_snap_obj; 865 } 866 867 return (0); 868 869} 870 871static int 872dmu_recv_begin_check(void *arg, dmu_tx_t *tx) 873{ 874 dmu_recv_begin_arg_t *drba = arg; 875 dsl_pool_t *dp = dmu_tx_pool(tx); 876 struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 877 uint64_t fromguid = drrb->drr_fromguid; 878 int flags = drrb->drr_flags; 879 int error; 880 dsl_dataset_t *ds; 881 const char *tofs = drba->drba_cookie->drc_tofs; 882 883 /* already checked */ 884 ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 885 886 if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == 887 DMU_COMPOUNDSTREAM || 888 drrb->drr_type >= DMU_OST_NUMTYPES || 889 ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) 890 return (SET_ERROR(EINVAL)); 891 892 /* Verify pool version supports SA if SA_SPILL feature set */ 893 if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & 894 DMU_BACKUP_FEATURE_SA_SPILL) && 895 spa_version(dp->dp_spa) < SPA_VERSION_SA) { 896 return (SET_ERROR(ENOTSUP)); 897 } 898 899 error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 900 if (error == 0) { 901 /* target fs already exists; recv into temp clone */ 902 903 /* Can't recv a clone into an existing fs */ 904 if (flags & DRR_FLAG_CLONE) { 905 dsl_dataset_rele(ds, FTAG); 906 return (SET_ERROR(EINVAL)); 907 } 908 909 error = recv_begin_check_existing_impl(drba, ds, fromguid); 910 dsl_dataset_rele(ds, FTAG); 911 } else if (error == ENOENT) { 912 /* target fs does not exist; must be a full backup or clone */ 913 char buf[MAXNAMELEN]; 914 915 /* 916 * If it's a non-clone incremental, we are missing the 917 * target fs, so fail the recv. 918 */ 919 if (fromguid != 0 && !(flags & DRR_FLAG_CLONE)) 920 return (SET_ERROR(ENOENT)); 921 922 /* Open the parent of tofs */ 923 ASSERT3U(strlen(tofs), <, MAXNAMELEN); 924 (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); 925 error = dsl_dataset_hold(dp, buf, FTAG, &ds); 926 if (error != 0) 927 return (error); 928 929 /* 930 * Check filesystem and snapshot limits before receiving. We'll 931 * recheck snapshot limits again at the end (we create the 932 * filesystems and increment those counts during begin_sync). 933 */ 934 error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 935 ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); 936 if (error != 0) { 937 dsl_dataset_rele(ds, FTAG); 938 return (error); 939 } 940 941 error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 942 ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); 943 if (error != 0) { 944 dsl_dataset_rele(ds, FTAG); 945 return (error); 946 } 947 948 if (drba->drba_origin != NULL) { 949 dsl_dataset_t *origin; 950 error = dsl_dataset_hold(dp, drba->drba_origin, 951 FTAG, &origin); 952 if (error != 0) { 953 dsl_dataset_rele(ds, FTAG); 954 return (error); 955 } 956 if (!dsl_dataset_is_snapshot(origin)) { 957 dsl_dataset_rele(origin, FTAG); 958 dsl_dataset_rele(ds, FTAG); 959 return (SET_ERROR(EINVAL)); 960 } 961 if (origin->ds_phys->ds_guid != fromguid) { 962 dsl_dataset_rele(origin, FTAG); 963 dsl_dataset_rele(ds, FTAG); 964 return (SET_ERROR(ENODEV)); 965 } 966 dsl_dataset_rele(origin, FTAG); 967 } 968 dsl_dataset_rele(ds, FTAG); 969 error = 0; 970 } 971 return (error); 972} 973 974static void 975dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 976{ 977 dmu_recv_begin_arg_t *drba = arg; 978 dsl_pool_t *dp = dmu_tx_pool(tx); 979 struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 980 const char *tofs = drba->drba_cookie->drc_tofs; 981 dsl_dataset_t *ds, *newds; 982 uint64_t dsobj; 983 int error; 984 uint64_t crflags; 985 986 crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? 987 DS_FLAG_CI_DATASET : 0; 988 989 error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 990 if (error == 0) { 991 /* create temporary clone */ 992 dsl_dataset_t *snap = NULL; 993 if (drba->drba_snapobj != 0) { 994 VERIFY0(dsl_dataset_hold_obj(dp, 995 drba->drba_snapobj, FTAG, &snap)); 996 } 997 dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, 998 snap, crflags, drba->drba_cred, tx); 999 dsl_dataset_rele(snap, FTAG); 1000 dsl_dataset_rele(ds, FTAG); 1001 } else { 1002 dsl_dir_t *dd; 1003 const char *tail; 1004 dsl_dataset_t *origin = NULL; 1005 1006 VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail)); 1007 1008 if (drba->drba_origin != NULL) { 1009 VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, 1010 FTAG, &origin)); 1011 } 1012 1013 /* Create new dataset. */ 1014 dsobj = dsl_dataset_create_sync(dd, 1015 strrchr(tofs, '/') + 1, 1016 origin, crflags, drba->drba_cred, tx); 1017 if (origin != NULL) 1018 dsl_dataset_rele(origin, FTAG); 1019 dsl_dir_rele(dd, FTAG); 1020 drba->drba_cookie->drc_newfs = B_TRUE; 1021 } 1022 VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); 1023 1024 dmu_buf_will_dirty(newds->ds_dbuf, tx); 1025 newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1026 1027 /* 1028 * If we actually created a non-clone, we need to create the 1029 * objset in our new dataset. 1030 */ 1031 if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { 1032 (void) dmu_objset_create_impl(dp->dp_spa, 1033 newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); 1034 } 1035 1036 drba->drba_cookie->drc_ds = newds; 1037 1038 spa_history_log_internal_ds(newds, "receive", tx, ""); 1039} 1040 1041/* 1042 * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 1043 * succeeds; otherwise we will leak the holds on the datasets. 1044 */ 1045int 1046dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, 1047 boolean_t force, char *origin, dmu_recv_cookie_t *drc) 1048{ 1049 dmu_recv_begin_arg_t drba = { 0 }; 1050 dmu_replay_record_t *drr; 1051 1052 bzero(drc, sizeof (dmu_recv_cookie_t)); 1053 drc->drc_drrb = drrb; 1054 drc->drc_tosnap = tosnap; 1055 drc->drc_tofs = tofs; 1056 drc->drc_force = force; 1057 drc->drc_cred = CRED(); 1058 1059 if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 1060 drc->drc_byteswap = B_TRUE; 1061 else if (drrb->drr_magic != DMU_BACKUP_MAGIC) 1062 return (SET_ERROR(EINVAL)); 1063 1064 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 1065 drr->drr_type = DRR_BEGIN; 1066 drr->drr_u.drr_begin = *drc->drc_drrb; 1067 if (drc->drc_byteswap) { 1068 fletcher_4_incremental_byteswap(drr, 1069 sizeof (dmu_replay_record_t), &drc->drc_cksum); 1070 } else { 1071 fletcher_4_incremental_native(drr, 1072 sizeof (dmu_replay_record_t), &drc->drc_cksum); 1073 } 1074 kmem_free(drr, sizeof (dmu_replay_record_t)); 1075 1076 if (drc->drc_byteswap) { 1077 drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1078 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); 1079 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1080 drrb->drr_type = BSWAP_32(drrb->drr_type); 1081 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1082 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1083 } 1084 1085 drba.drba_origin = origin; 1086 drba.drba_cookie = drc; 1087 drba.drba_cred = CRED(); 1088 1089 return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, 1090 &drba, 5)); 1091} 1092 1093struct restorearg { 1094 int err; 1095 boolean_t byteswap; 1096 kthread_t *td; 1097 struct file *fp; 1098 char *buf; 1099 uint64_t voff; 1100 int bufsize; /* amount of memory allocated for buf */ 1101 zio_cksum_t cksum; 1102 avl_tree_t *guid_to_ds_map; 1103}; 1104 1105typedef struct guid_map_entry { 1106 uint64_t guid; 1107 dsl_dataset_t *gme_ds; 1108 avl_node_t avlnode; 1109} guid_map_entry_t; 1110 1111static int 1112guid_compare(const void *arg1, const void *arg2) 1113{ 1114 const guid_map_entry_t *gmep1 = arg1; 1115 const guid_map_entry_t *gmep2 = arg2; 1116 1117 if (gmep1->guid < gmep2->guid) 1118 return (-1); 1119 else if (gmep1->guid > gmep2->guid) 1120 return (1); 1121 return (0); 1122} 1123 1124static void 1125free_guid_map_onexit(void *arg) 1126{ 1127 avl_tree_t *ca = arg; 1128 void *cookie = NULL; 1129 guid_map_entry_t *gmep; 1130 1131 while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { 1132 dsl_dataset_long_rele(gmep->gme_ds, gmep); 1133 dsl_dataset_rele(gmep->gme_ds, gmep); 1134 kmem_free(gmep, sizeof (guid_map_entry_t)); 1135 } 1136 avl_destroy(ca); 1137 kmem_free(ca, sizeof (avl_tree_t)); 1138} 1139 1140static int 1141restore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid) 1142{ 1143 struct uio auio; 1144 struct iovec aiov; 1145 int error; 1146 1147 aiov.iov_base = buf; 1148 aiov.iov_len = len; 1149 auio.uio_iov = &aiov; 1150 auio.uio_iovcnt = 1; 1151 auio.uio_resid = len; 1152 auio.uio_segflg = UIO_SYSSPACE; 1153 auio.uio_rw = UIO_READ; 1154 auio.uio_offset = off; 1155 auio.uio_td = ra->td; 1156#ifdef _KERNEL 1157 error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 1158#else 1159 fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 1160 error = EOPNOTSUPP; 1161#endif 1162 *resid = auio.uio_resid; 1163 return (error); 1164} 1165 1166static void * 1167restore_read(struct restorearg *ra, int len) 1168{ 1169 void *rv; 1170 int done = 0; 1171 1172 /* some things will require 8-byte alignment, so everything must */ 1173 ASSERT0(len % 8); 1174 1175 while (done < len) { 1176 ssize_t resid; 1177 1178 ra->err = restore_bytes(ra, (caddr_t)ra->buf + done, 1179 len - done, ra->voff, &resid); 1180 1181 if (resid == len - done) 1182 ra->err = SET_ERROR(EINVAL); 1183 ra->voff += len - done - resid; 1184 done = len - resid; 1185 if (ra->err != 0) 1186 return (NULL); 1187 } 1188 1189 ASSERT3U(done, ==, len); 1190 rv = ra->buf; 1191 if (ra->byteswap) 1192 fletcher_4_incremental_byteswap(rv, len, &ra->cksum); 1193 else 1194 fletcher_4_incremental_native(rv, len, &ra->cksum); 1195 return (rv); 1196} 1197 1198static void 1199backup_byteswap(dmu_replay_record_t *drr) 1200{ 1201#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 1202#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 1203 drr->drr_type = BSWAP_32(drr->drr_type); 1204 drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 1205 switch (drr->drr_type) { 1206 case DRR_BEGIN: 1207 DO64(drr_begin.drr_magic); 1208 DO64(drr_begin.drr_versioninfo); 1209 DO64(drr_begin.drr_creation_time); 1210 DO32(drr_begin.drr_type); 1211 DO32(drr_begin.drr_flags); 1212 DO64(drr_begin.drr_toguid); 1213 DO64(drr_begin.drr_fromguid); 1214 break; 1215 case DRR_OBJECT: 1216 DO64(drr_object.drr_object); 1217 /* DO64(drr_object.drr_allocation_txg); */ 1218 DO32(drr_object.drr_type); 1219 DO32(drr_object.drr_bonustype); 1220 DO32(drr_object.drr_blksz); 1221 DO32(drr_object.drr_bonuslen); 1222 DO64(drr_object.drr_toguid); 1223 break; 1224 case DRR_FREEOBJECTS: 1225 DO64(drr_freeobjects.drr_firstobj); 1226 DO64(drr_freeobjects.drr_numobjs); 1227 DO64(drr_freeobjects.drr_toguid); 1228 break; 1229 case DRR_WRITE: 1230 DO64(drr_write.drr_object); 1231 DO32(drr_write.drr_type); 1232 DO64(drr_write.drr_offset); 1233 DO64(drr_write.drr_length); 1234 DO64(drr_write.drr_toguid); 1235 DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); 1236 DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); 1237 DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); 1238 DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); 1239 DO64(drr_write.drr_key.ddk_prop); 1240 break; 1241 case DRR_WRITE_BYREF: 1242 DO64(drr_write_byref.drr_object); 1243 DO64(drr_write_byref.drr_offset); 1244 DO64(drr_write_byref.drr_length); 1245 DO64(drr_write_byref.drr_toguid); 1246 DO64(drr_write_byref.drr_refguid); 1247 DO64(drr_write_byref.drr_refobject); 1248 DO64(drr_write_byref.drr_refoffset); 1249 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); 1250 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); 1251 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); 1252 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); 1253 DO64(drr_write_byref.drr_key.ddk_prop); 1254 break; 1255 case DRR_FREE: 1256 DO64(drr_free.drr_object); 1257 DO64(drr_free.drr_offset); 1258 DO64(drr_free.drr_length); 1259 DO64(drr_free.drr_toguid); 1260 break; 1261 case DRR_SPILL: 1262 DO64(drr_spill.drr_object); 1263 DO64(drr_spill.drr_length); 1264 DO64(drr_spill.drr_toguid); 1265 break; 1266 case DRR_END: 1267 DO64(drr_end.drr_checksum.zc_word[0]); 1268 DO64(drr_end.drr_checksum.zc_word[1]); 1269 DO64(drr_end.drr_checksum.zc_word[2]); 1270 DO64(drr_end.drr_checksum.zc_word[3]); 1271 DO64(drr_end.drr_toguid); 1272 break; 1273 } 1274#undef DO64 1275#undef DO32 1276} 1277 1278static int 1279restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 1280{ 1281 int err; 1282 dmu_tx_t *tx; 1283 void *data = NULL; 1284 1285 if (drro->drr_type == DMU_OT_NONE || 1286 !DMU_OT_IS_VALID(drro->drr_type) || 1287 !DMU_OT_IS_VALID(drro->drr_bonustype) || 1288 drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || 1289 drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1290 P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1291 drro->drr_blksz < SPA_MINBLOCKSIZE || 1292 drro->drr_blksz > SPA_MAXBLOCKSIZE || 1293 drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1294 return (SET_ERROR(EINVAL)); 1295 } 1296 1297 err = dmu_object_info(os, drro->drr_object, NULL); 1298 1299 if (err != 0 && err != ENOENT) 1300 return (SET_ERROR(EINVAL)); 1301 1302 if (drro->drr_bonuslen) { 1303 data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); 1304 if (ra->err != 0) 1305 return (ra->err); 1306 } 1307 1308 if (err == ENOENT) { 1309 /* currently free, want to be allocated */ 1310 tx = dmu_tx_create(os); 1311 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1312 err = dmu_tx_assign(tx, TXG_WAIT); 1313 if (err != 0) { 1314 dmu_tx_abort(tx); 1315 return (err); 1316 } 1317 err = dmu_object_claim(os, drro->drr_object, 1318 drro->drr_type, drro->drr_blksz, 1319 drro->drr_bonustype, drro->drr_bonuslen, tx); 1320 dmu_tx_commit(tx); 1321 } else { 1322 /* currently allocated, want to be allocated */ 1323 err = dmu_object_reclaim(os, drro->drr_object, 1324 drro->drr_type, drro->drr_blksz, 1325 drro->drr_bonustype, drro->drr_bonuslen); 1326 } 1327 if (err != 0) { 1328 return (SET_ERROR(EINVAL)); 1329 } 1330 1331 tx = dmu_tx_create(os); 1332 dmu_tx_hold_bonus(tx, drro->drr_object); 1333 err = dmu_tx_assign(tx, TXG_WAIT); 1334 if (err != 0) { 1335 dmu_tx_abort(tx); 1336 return (err); 1337 } 1338 1339 dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, 1340 tx); 1341 dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1342 1343 if (data != NULL) { 1344 dmu_buf_t *db; 1345 1346 VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1347 dmu_buf_will_dirty(db, tx); 1348 1349 ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 1350 bcopy(data, db->db_data, drro->drr_bonuslen); 1351 if (ra->byteswap) { 1352 dmu_object_byteswap_t byteswap = 1353 DMU_OT_BYTESWAP(drro->drr_bonustype); 1354 dmu_ot_byteswap[byteswap].ob_func(db->db_data, 1355 drro->drr_bonuslen); 1356 } 1357 dmu_buf_rele(db, FTAG); 1358 } 1359 dmu_tx_commit(tx); 1360 return (0); 1361} 1362 1363/* ARGSUSED */ 1364static int 1365restore_freeobjects(struct restorearg *ra, objset_t *os, 1366 struct drr_freeobjects *drrfo) 1367{ 1368 uint64_t obj; 1369 1370 if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1371 return (SET_ERROR(EINVAL)); 1372 1373 for (obj = drrfo->drr_firstobj; 1374 obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 1375 (void) dmu_object_next(os, &obj, FALSE, 0)) { 1376 int err; 1377 1378 if (dmu_object_info(os, obj, NULL) != 0) 1379 continue; 1380 1381 err = dmu_free_long_object(os, obj); 1382 if (err != 0) 1383 return (err); 1384 } 1385 return (0); 1386} 1387 1388static int 1389restore_write(struct restorearg *ra, objset_t *os, 1390 struct drr_write *drrw) 1391{ 1392 dmu_tx_t *tx; 1393 void *data; 1394 int err; 1395 1396 if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1397 !DMU_OT_IS_VALID(drrw->drr_type)) 1398 return (SET_ERROR(EINVAL)); 1399 1400 data = restore_read(ra, drrw->drr_length); 1401 if (data == NULL) 1402 return (ra->err); 1403 1404 if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1405 return (SET_ERROR(EINVAL)); 1406 1407 tx = dmu_tx_create(os); 1408 1409 dmu_tx_hold_write(tx, drrw->drr_object, 1410 drrw->drr_offset, drrw->drr_length); 1411 err = dmu_tx_assign(tx, TXG_WAIT); 1412 if (err != 0) { 1413 dmu_tx_abort(tx); 1414 return (err); 1415 } 1416 if (ra->byteswap) { 1417 dmu_object_byteswap_t byteswap = 1418 DMU_OT_BYTESWAP(drrw->drr_type); 1419 dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); 1420 } 1421 dmu_write(os, drrw->drr_object, 1422 drrw->drr_offset, drrw->drr_length, data, tx); 1423 dmu_tx_commit(tx); 1424 return (0); 1425} 1426 1427/* 1428 * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed 1429 * streams to refer to a copy of the data that is already on the 1430 * system because it came in earlier in the stream. This function 1431 * finds the earlier copy of the data, and uses that copy instead of 1432 * data from the stream to fulfill this write. 1433 */ 1434static int 1435restore_write_byref(struct restorearg *ra, objset_t *os, 1436 struct drr_write_byref *drrwbr) 1437{ 1438 dmu_tx_t *tx; 1439 int err; 1440 guid_map_entry_t gmesrch; 1441 guid_map_entry_t *gmep; 1442 avl_index_t where; 1443 objset_t *ref_os = NULL; 1444 dmu_buf_t *dbp; 1445 1446 if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) 1447 return (SET_ERROR(EINVAL)); 1448 1449 /* 1450 * If the GUID of the referenced dataset is different from the 1451 * GUID of the target dataset, find the referenced dataset. 1452 */ 1453 if (drrwbr->drr_toguid != drrwbr->drr_refguid) { 1454 gmesrch.guid = drrwbr->drr_refguid; 1455 if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, 1456 &where)) == NULL) { 1457 return (SET_ERROR(EINVAL)); 1458 } 1459 if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) 1460 return (SET_ERROR(EINVAL)); 1461 } else { 1462 ref_os = os; 1463 } 1464 1465 if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, 1466 drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH)) 1467 return (err); 1468 1469 tx = dmu_tx_create(os); 1470 1471 dmu_tx_hold_write(tx, drrwbr->drr_object, 1472 drrwbr->drr_offset, drrwbr->drr_length); 1473 err = dmu_tx_assign(tx, TXG_WAIT); 1474 if (err != 0) { 1475 dmu_tx_abort(tx); 1476 return (err); 1477 } 1478 dmu_write(os, drrwbr->drr_object, 1479 drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); 1480 dmu_buf_rele(dbp, FTAG); 1481 dmu_tx_commit(tx); 1482 return (0); 1483} 1484 1485static int 1486restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) 1487{ 1488 dmu_tx_t *tx; 1489 void *data; 1490 dmu_buf_t *db, *db_spill; 1491 int err; 1492 1493 if (drrs->drr_length < SPA_MINBLOCKSIZE || 1494 drrs->drr_length > SPA_MAXBLOCKSIZE) 1495 return (SET_ERROR(EINVAL)); 1496 1497 data = restore_read(ra, drrs->drr_length); 1498 if (data == NULL) 1499 return (ra->err); 1500 1501 if (dmu_object_info(os, drrs->drr_object, NULL) != 0) 1502 return (SET_ERROR(EINVAL)); 1503 1504 VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); 1505 if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { 1506 dmu_buf_rele(db, FTAG); 1507 return (err); 1508 } 1509 1510 tx = dmu_tx_create(os); 1511 1512 dmu_tx_hold_spill(tx, db->db_object); 1513 1514 err = dmu_tx_assign(tx, TXG_WAIT); 1515 if (err != 0) { 1516 dmu_buf_rele(db, FTAG); 1517 dmu_buf_rele(db_spill, FTAG); 1518 dmu_tx_abort(tx); 1519 return (err); 1520 } 1521 dmu_buf_will_dirty(db_spill, tx); 1522 1523 if (db_spill->db_size < drrs->drr_length) 1524 VERIFY(0 == dbuf_spill_set_blksz(db_spill, 1525 drrs->drr_length, tx)); 1526 bcopy(data, db_spill->db_data, drrs->drr_length); 1527 1528 dmu_buf_rele(db, FTAG); 1529 dmu_buf_rele(db_spill, FTAG); 1530 1531 dmu_tx_commit(tx); 1532 return (0); 1533} 1534 1535/* ARGSUSED */ 1536static int 1537restore_free(struct restorearg *ra, objset_t *os, 1538 struct drr_free *drrf) 1539{ 1540 int err; 1541 1542 if (drrf->drr_length != -1ULL && 1543 drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1544 return (SET_ERROR(EINVAL)); 1545 1546 if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1547 return (SET_ERROR(EINVAL)); 1548 1549 err = dmu_free_long_range(os, drrf->drr_object, 1550 drrf->drr_offset, drrf->drr_length); 1551 return (err); 1552} 1553 1554/* used to destroy the drc_ds on error */ 1555static void 1556dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) 1557{ 1558 char name[MAXNAMELEN]; 1559 dsl_dataset_name(drc->drc_ds, name); 1560 dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1561 (void) dsl_destroy_head(name); 1562} 1563 1564/* 1565 * NB: callers *must* call dmu_recv_end() if this succeeds. 1566 */ 1567int 1568dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, 1569 int cleanup_fd, uint64_t *action_handlep) 1570{ 1571 struct restorearg ra = { 0 }; 1572 dmu_replay_record_t *drr; 1573 objset_t *os; 1574 zio_cksum_t pcksum; 1575 int featureflags; 1576 1577 ra.byteswap = drc->drc_byteswap; 1578 ra.cksum = drc->drc_cksum; 1579 ra.td = curthread; 1580 ra.fp = fp; 1581 ra.voff = *voffp; 1582 ra.bufsize = 1<<20; 1583 ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1584 1585 /* these were verified in dmu_recv_begin */ 1586 ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, 1587 DMU_SUBSTREAM); 1588 ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES); 1589 1590 /* 1591 * Open the objset we are modifying. 1592 */ 1593 VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); 1594 1595 ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); 1596 1597 featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); 1598 1599 /* if this stream is dedup'ed, set up the avl tree for guid mapping */ 1600 if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 1601 minor_t minor; 1602 1603 if (cleanup_fd == -1) { 1604 ra.err = SET_ERROR(EBADF); 1605 goto out; 1606 } 1607 ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); 1608 if (ra.err != 0) { 1609 cleanup_fd = -1; 1610 goto out; 1611 } 1612 1613 if (*action_handlep == 0) { 1614 ra.guid_to_ds_map = 1615 kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 1616 avl_create(ra.guid_to_ds_map, guid_compare, 1617 sizeof (guid_map_entry_t), 1618 offsetof(guid_map_entry_t, avlnode)); 1619 ra.err = zfs_onexit_add_cb(minor, 1620 free_guid_map_onexit, ra.guid_to_ds_map, 1621 action_handlep); 1622 if (ra.err != 0) 1623 goto out; 1624 } else { 1625 ra.err = zfs_onexit_cb_data(minor, *action_handlep, 1626 (void **)&ra.guid_to_ds_map); 1627 if (ra.err != 0) 1628 goto out; 1629 } 1630 1631 drc->drc_guid_to_ds_map = ra.guid_to_ds_map; 1632 } 1633 1634 /* 1635 * Read records and process them. 1636 */ 1637 pcksum = ra.cksum; 1638 while (ra.err == 0 && 1639 NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1640 if (issig(JUSTLOOKING) && issig(FORREAL)) { 1641 ra.err = SET_ERROR(EINTR); 1642 goto out; 1643 } 1644 1645 if (ra.byteswap) 1646 backup_byteswap(drr); 1647 1648 switch (drr->drr_type) { 1649 case DRR_OBJECT: 1650 { 1651 /* 1652 * We need to make a copy of the record header, 1653 * because restore_{object,write} may need to 1654 * restore_read(), which will invalidate drr. 1655 */ 1656 struct drr_object drro = drr->drr_u.drr_object; 1657 ra.err = restore_object(&ra, os, &drro); 1658 break; 1659 } 1660 case DRR_FREEOBJECTS: 1661 { 1662 struct drr_freeobjects drrfo = 1663 drr->drr_u.drr_freeobjects; 1664 ra.err = restore_freeobjects(&ra, os, &drrfo); 1665 break; 1666 } 1667 case DRR_WRITE: 1668 { 1669 struct drr_write drrw = drr->drr_u.drr_write; 1670 ra.err = restore_write(&ra, os, &drrw); 1671 break; 1672 } 1673 case DRR_WRITE_BYREF: 1674 { 1675 struct drr_write_byref drrwbr = 1676 drr->drr_u.drr_write_byref; 1677 ra.err = restore_write_byref(&ra, os, &drrwbr); 1678 break; 1679 } 1680 case DRR_FREE: 1681 { 1682 struct drr_free drrf = drr->drr_u.drr_free; 1683 ra.err = restore_free(&ra, os, &drrf); 1684 break; 1685 } 1686 case DRR_END: 1687 { 1688 struct drr_end drre = drr->drr_u.drr_end; 1689 /* 1690 * We compare against the *previous* checksum 1691 * value, because the stored checksum is of 1692 * everything before the DRR_END record. 1693 */ 1694 if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 1695 ra.err = SET_ERROR(ECKSUM); 1696 goto out; 1697 } 1698 case DRR_SPILL: 1699 { 1700 struct drr_spill drrs = drr->drr_u.drr_spill; 1701 ra.err = restore_spill(&ra, os, &drrs); 1702 break; 1703 } 1704 default: 1705 ra.err = SET_ERROR(EINVAL); 1706 goto out; 1707 } 1708 pcksum = ra.cksum; 1709 } 1710 ASSERT(ra.err != 0); 1711 1712out: 1713 if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) 1714 zfs_onexit_fd_rele(cleanup_fd); 1715 1716 if (ra.err != 0) { 1717 /* 1718 * destroy what we created, so we don't leave it in the 1719 * inconsistent restoring state. 1720 */ 1721 dmu_recv_cleanup_ds(drc); 1722 } 1723 1724 kmem_free(ra.buf, ra.bufsize); 1725 *voffp = ra.voff; 1726 return (ra.err); 1727} 1728 1729static int 1730dmu_recv_end_check(void *arg, dmu_tx_t *tx) 1731{ 1732 dmu_recv_cookie_t *drc = arg; 1733 dsl_pool_t *dp = dmu_tx_pool(tx); 1734 int error; 1735 1736 ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); 1737 1738 if (!drc->drc_newfs) { 1739 dsl_dataset_t *origin_head; 1740 1741 error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); 1742 if (error != 0) 1743 return (error); 1744 if (drc->drc_force) { 1745 /* 1746 * We will destroy any snapshots in tofs (i.e. before 1747 * origin_head) that are after the origin (which is 1748 * the snap before drc_ds, because drc_ds can not 1749 * have any snaps of its own). 1750 */ 1751 uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1752 while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1753 dsl_dataset_t *snap; 1754 error = dsl_dataset_hold_obj(dp, obj, FTAG, 1755 &snap); 1756 if (error != 0) 1757 return (error); 1758 if (snap->ds_dir != origin_head->ds_dir) 1759 error = SET_ERROR(EINVAL); 1760 if (error == 0) { 1761 error = dsl_destroy_snapshot_check_impl( 1762 snap, B_FALSE); 1763 } 1764 obj = snap->ds_phys->ds_prev_snap_obj; 1765 dsl_dataset_rele(snap, FTAG); 1766 if (error != 0) 1767 return (error); 1768 } 1769 } 1770 error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, 1771 origin_head, drc->drc_force, drc->drc_owner, tx); 1772 if (error != 0) { 1773 dsl_dataset_rele(origin_head, FTAG); 1774 return (error); 1775 } 1776 error = dsl_dataset_snapshot_check_impl(origin_head, 1777 drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 1778 dsl_dataset_rele(origin_head, FTAG); 1779 if (error != 0) 1780 return (error); 1781 1782 error = dsl_destroy_head_check_impl(drc->drc_ds, 1); 1783 } else { 1784 error = dsl_dataset_snapshot_check_impl(drc->drc_ds, 1785 drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 1786 } 1787 return (error); 1788} 1789 1790static void 1791dmu_recv_end_sync(void *arg, dmu_tx_t *tx) 1792{ 1793 dmu_recv_cookie_t *drc = arg; 1794 dsl_pool_t *dp = dmu_tx_pool(tx); 1795 1796 spa_history_log_internal_ds(drc->drc_ds, "finish receiving", 1797 tx, "snap=%s", drc->drc_tosnap); 1798 1799 if (!drc->drc_newfs) { 1800 dsl_dataset_t *origin_head; 1801 1802 VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, 1803 &origin_head)); 1804 1805 if (drc->drc_force) { 1806 /* 1807 * Destroy any snapshots of drc_tofs (origin_head) 1808 * after the origin (the snap before drc_ds). 1809 */ 1810 uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1811 while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1812 dsl_dataset_t *snap; 1813 VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, 1814 &snap)); 1815 ASSERT3P(snap->ds_dir, ==, origin_head->ds_dir); 1816 obj = snap->ds_phys->ds_prev_snap_obj; 1817 dsl_destroy_snapshot_sync_impl(snap, 1818 B_FALSE, tx); 1819 dsl_dataset_rele(snap, FTAG); 1820 } 1821 } 1822 VERIFY3P(drc->drc_ds->ds_prev, ==, 1823 origin_head->ds_prev); 1824 1825 dsl_dataset_clone_swap_sync_impl(drc->drc_ds, 1826 origin_head, tx); 1827 dsl_dataset_snapshot_sync_impl(origin_head, 1828 drc->drc_tosnap, tx); 1829 1830 /* set snapshot's creation time and guid */ 1831 dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); 1832 origin_head->ds_prev->ds_phys->ds_creation_time = 1833 drc->drc_drrb->drr_creation_time; 1834 origin_head->ds_prev->ds_phys->ds_guid = 1835 drc->drc_drrb->drr_toguid; 1836 origin_head->ds_prev->ds_phys->ds_flags &= 1837 ~DS_FLAG_INCONSISTENT; 1838 1839 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 1840 origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1841 1842 dsl_dataset_rele(origin_head, FTAG); 1843 dsl_destroy_head_sync_impl(drc->drc_ds, tx); 1844 1845 if (drc->drc_owner != NULL) 1846 VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner); 1847 } else { 1848 dsl_dataset_t *ds = drc->drc_ds; 1849 1850 dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx); 1851 1852 /* set snapshot's creation time and guid */ 1853 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1854 ds->ds_prev->ds_phys->ds_creation_time = 1855 drc->drc_drrb->drr_creation_time; 1856 ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid; 1857 ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1858 1859 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1860 ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1861 } 1862 drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj; 1863 /* 1864 * Release the hold from dmu_recv_begin. This must be done before 1865 * we return to open context, so that when we free the dataset's dnode, 1866 * we can evict its bonus buffer. 1867 */ 1868 dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1869 drc->drc_ds = NULL; 1870} 1871 1872static int 1873add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) 1874{ 1875 dsl_pool_t *dp; 1876 dsl_dataset_t *snapds; 1877 guid_map_entry_t *gmep; 1878 int err; 1879 1880 ASSERT(guid_map != NULL); 1881 1882 err = dsl_pool_hold(name, FTAG, &dp); 1883 if (err != 0) 1884 return (err); 1885 gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); 1886 err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); 1887 if (err == 0) { 1888 gmep->guid = snapds->ds_phys->ds_guid; 1889 gmep->gme_ds = snapds; 1890 avl_add(guid_map, gmep); 1891 dsl_dataset_long_hold(snapds, gmep); 1892 } else 1893 kmem_free(gmep, sizeof (*gmep)); 1894 1895 dsl_pool_rele(dp, FTAG); 1896 return (err); 1897} 1898 1899static int dmu_recv_end_modified_blocks = 3; 1900 1901static int 1902dmu_recv_existing_end(dmu_recv_cookie_t *drc) 1903{ 1904 int error; 1905 char name[MAXNAMELEN]; 1906 1907#ifdef _KERNEL 1908 /* 1909 * We will be destroying the ds; make sure its origin is unmounted if 1910 * necessary. 1911 */ 1912 dsl_dataset_name(drc->drc_ds, name); 1913 zfs_destroy_unmount_origin(name); 1914#endif 1915 1916 error = dsl_sync_task(drc->drc_tofs, 1917 dmu_recv_end_check, dmu_recv_end_sync, drc, 1918 dmu_recv_end_modified_blocks); 1919 1920 if (error != 0) 1921 dmu_recv_cleanup_ds(drc); 1922 return (error); 1923} 1924 1925static int 1926dmu_recv_new_end(dmu_recv_cookie_t *drc) 1927{ 1928 int error; 1929 1930 error = dsl_sync_task(drc->drc_tofs, 1931 dmu_recv_end_check, dmu_recv_end_sync, drc, 1932 dmu_recv_end_modified_blocks); 1933 1934 if (error != 0) { 1935 dmu_recv_cleanup_ds(drc); 1936 } else if (drc->drc_guid_to_ds_map != NULL) { 1937 (void) add_ds_to_guidmap(drc->drc_tofs, 1938 drc->drc_guid_to_ds_map, 1939 drc->drc_newsnapobj); 1940 } 1941 return (error); 1942} 1943 1944int 1945dmu_recv_end(dmu_recv_cookie_t *drc, void *owner) 1946{ 1947 drc->drc_owner = owner; 1948 1949 if (drc->drc_newfs) 1950 return (dmu_recv_new_end(drc)); 1951 else 1952 return (dmu_recv_existing_end(drc)); 1953} 1954 1955/* 1956 * Return TRUE if this objset is currently being received into. 1957 */ 1958boolean_t 1959dmu_objset_is_receiving(objset_t *os) 1960{ 1961 return (os->os_dsl_dataset != NULL && 1962 os->os_dsl_dataset->ds_owner == dmu_recv_tag); 1963} 1964