dmu_send.c revision 263407
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2013 by Delphix. All rights reserved. 25 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 26 * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved. 27 */ 28 29#include <sys/dmu.h> 30#include <sys/dmu_impl.h> 31#include <sys/dmu_tx.h> 32#include <sys/dbuf.h> 33#include <sys/dnode.h> 34#include <sys/zfs_context.h> 35#include <sys/dmu_objset.h> 36#include <sys/dmu_traverse.h> 37#include <sys/dsl_dataset.h> 38#include <sys/dsl_dir.h> 39#include <sys/dsl_prop.h> 40#include <sys/dsl_pool.h> 41#include <sys/dsl_synctask.h> 42#include <sys/zfs_ioctl.h> 43#include <sys/zap.h> 44#include <sys/zio_checksum.h> 45#include <sys/zfs_znode.h> 46#include <zfs_fletcher.h> 47#include <sys/avl.h> 48#include <sys/ddt.h> 49#include <sys/zfs_onexit.h> 50#include <sys/dmu_send.h> 51#include <sys/dsl_destroy.h> 52#include <sys/dsl_bookmark.h> 53 54/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ 55int zfs_send_corrupt_data = B_FALSE; 56 57static char *dmu_recv_tag = "dmu_recv_tag"; 58static const char *recv_clone_name = "%recv"; 59 60static int 61dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) 62{ 63 dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; 64 struct uio auio; 65 struct iovec aiov; 66 ASSERT0(len % 8); 67 68 fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); 69 aiov.iov_base = buf; 70 aiov.iov_len = len; 71 auio.uio_iov = &aiov; 72 auio.uio_iovcnt = 1; 73 auio.uio_resid = len; 74 auio.uio_segflg = UIO_SYSSPACE; 75 auio.uio_rw = UIO_WRITE; 76 auio.uio_offset = (off_t)-1; 77 auio.uio_td = dsp->dsa_td; 78#ifdef _KERNEL 79 if (dsp->dsa_fp->f_type == DTYPE_VNODE) 80 bwillwrite(); 81 dsp->dsa_err = fo_write(dsp->dsa_fp, &auio, dsp->dsa_td->td_ucred, 0, 82 dsp->dsa_td); 83#else 84 fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 85 dsp->dsa_err = EOPNOTSUPP; 86#endif 87 mutex_enter(&ds->ds_sendstream_lock); 88 *dsp->dsa_off += len; 89 mutex_exit(&ds->ds_sendstream_lock); 90 91 return (dsp->dsa_err); 92} 93 94static int 95dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 96 uint64_t length) 97{ 98 struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); 99 100 /* 101 * When we receive a free record, dbuf_free_range() assumes 102 * that the receiving system doesn't have any dbufs in the range 103 * being freed. This is always true because there is a one-record 104 * constraint: we only send one WRITE record for any given 105 * object+offset. We know that the one-record constraint is 106 * true because we always send data in increasing order by 107 * object,offset. 108 * 109 * If the increasing-order constraint ever changes, we should find 110 * another way to assert that the one-record constraint is still 111 * satisfied. 112 */ 113 ASSERT(object > dsp->dsa_last_data_object || 114 (object == dsp->dsa_last_data_object && 115 offset > dsp->dsa_last_data_offset)); 116 117 /* 118 * If we are doing a non-incremental send, then there can't 119 * be any data in the dataset we're receiving into. Therefore 120 * a free record would simply be a no-op. Save space by not 121 * sending it to begin with. 122 */ 123 if (!dsp->dsa_incremental) 124 return (0); 125 126 if (length != -1ULL && offset + length < offset) 127 length = -1ULL; 128 129 /* 130 * If there is a pending op, but it's not PENDING_FREE, push it out, 131 * since free block aggregation can only be done for blocks of the 132 * same type (i.e., DRR_FREE records can only be aggregated with 133 * other DRR_FREE records. DRR_FREEOBJECTS records can only be 134 * aggregated with other DRR_FREEOBJECTS records. 135 */ 136 if (dsp->dsa_pending_op != PENDING_NONE && 137 dsp->dsa_pending_op != PENDING_FREE) { 138 if (dump_bytes(dsp, dsp->dsa_drr, 139 sizeof (dmu_replay_record_t)) != 0) 140 return (SET_ERROR(EINTR)); 141 dsp->dsa_pending_op = PENDING_NONE; 142 } 143 144 if (dsp->dsa_pending_op == PENDING_FREE) { 145 /* 146 * There should never be a PENDING_FREE if length is -1 147 * (because dump_dnode is the only place where this 148 * function is called with a -1, and only after flushing 149 * any pending record). 150 */ 151 ASSERT(length != -1ULL); 152 /* 153 * Check to see whether this free block can be aggregated 154 * with pending one. 155 */ 156 if (drrf->drr_object == object && drrf->drr_offset + 157 drrf->drr_length == offset) { 158 drrf->drr_length += length; 159 return (0); 160 } else { 161 /* not a continuation. Push out pending record */ 162 if (dump_bytes(dsp, dsp->dsa_drr, 163 sizeof (dmu_replay_record_t)) != 0) 164 return (SET_ERROR(EINTR)); 165 dsp->dsa_pending_op = PENDING_NONE; 166 } 167 } 168 /* create a FREE record and make it pending */ 169 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 170 dsp->dsa_drr->drr_type = DRR_FREE; 171 drrf->drr_object = object; 172 drrf->drr_offset = offset; 173 drrf->drr_length = length; 174 drrf->drr_toguid = dsp->dsa_toguid; 175 if (length == -1ULL) { 176 if (dump_bytes(dsp, dsp->dsa_drr, 177 sizeof (dmu_replay_record_t)) != 0) 178 return (SET_ERROR(EINTR)); 179 } else { 180 dsp->dsa_pending_op = PENDING_FREE; 181 } 182 183 return (0); 184} 185 186static int 187dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, 188 uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) 189{ 190 struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); 191 192 /* 193 * We send data in increasing object, offset order. 194 * See comment in dump_free() for details. 195 */ 196 ASSERT(object > dsp->dsa_last_data_object || 197 (object == dsp->dsa_last_data_object && 198 offset > dsp->dsa_last_data_offset)); 199 dsp->dsa_last_data_object = object; 200 dsp->dsa_last_data_offset = offset + blksz - 1; 201 202 /* 203 * If there is any kind of pending aggregation (currently either 204 * a grouping of free objects or free blocks), push it out to 205 * the stream, since aggregation can't be done across operations 206 * of different types. 207 */ 208 if (dsp->dsa_pending_op != PENDING_NONE) { 209 if (dump_bytes(dsp, dsp->dsa_drr, 210 sizeof (dmu_replay_record_t)) != 0) 211 return (SET_ERROR(EINTR)); 212 dsp->dsa_pending_op = PENDING_NONE; 213 } 214 /* write a DATA record */ 215 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 216 dsp->dsa_drr->drr_type = DRR_WRITE; 217 drrw->drr_object = object; 218 drrw->drr_type = type; 219 drrw->drr_offset = offset; 220 drrw->drr_length = blksz; 221 drrw->drr_toguid = dsp->dsa_toguid; 222 drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); 223 if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) 224 drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; 225 DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); 226 DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); 227 DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); 228 drrw->drr_key.ddk_cksum = bp->blk_cksum; 229 230 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 231 return (SET_ERROR(EINTR)); 232 if (dump_bytes(dsp, data, blksz) != 0) 233 return (SET_ERROR(EINTR)); 234 return (0); 235} 236 237static int 238dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) 239{ 240 struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); 241 242 if (dsp->dsa_pending_op != PENDING_NONE) { 243 if (dump_bytes(dsp, dsp->dsa_drr, 244 sizeof (dmu_replay_record_t)) != 0) 245 return (SET_ERROR(EINTR)); 246 dsp->dsa_pending_op = PENDING_NONE; 247 } 248 249 /* write a SPILL record */ 250 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 251 dsp->dsa_drr->drr_type = DRR_SPILL; 252 drrs->drr_object = object; 253 drrs->drr_length = blksz; 254 drrs->drr_toguid = dsp->dsa_toguid; 255 256 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) 257 return (SET_ERROR(EINTR)); 258 if (dump_bytes(dsp, data, blksz)) 259 return (SET_ERROR(EINTR)); 260 return (0); 261} 262 263static int 264dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) 265{ 266 struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); 267 268 /* See comment in dump_free(). */ 269 if (!dsp->dsa_incremental) 270 return (0); 271 272 /* 273 * If there is a pending op, but it's not PENDING_FREEOBJECTS, 274 * push it out, since free block aggregation can only be done for 275 * blocks of the same type (i.e., DRR_FREE records can only be 276 * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records 277 * can only be aggregated with other DRR_FREEOBJECTS records. 278 */ 279 if (dsp->dsa_pending_op != PENDING_NONE && 280 dsp->dsa_pending_op != PENDING_FREEOBJECTS) { 281 if (dump_bytes(dsp, dsp->dsa_drr, 282 sizeof (dmu_replay_record_t)) != 0) 283 return (SET_ERROR(EINTR)); 284 dsp->dsa_pending_op = PENDING_NONE; 285 } 286 if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { 287 /* 288 * See whether this free object array can be aggregated 289 * with pending one 290 */ 291 if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { 292 drrfo->drr_numobjs += numobjs; 293 return (0); 294 } else { 295 /* can't be aggregated. Push out pending record */ 296 if (dump_bytes(dsp, dsp->dsa_drr, 297 sizeof (dmu_replay_record_t)) != 0) 298 return (SET_ERROR(EINTR)); 299 dsp->dsa_pending_op = PENDING_NONE; 300 } 301 } 302 303 /* write a FREEOBJECTS record */ 304 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 305 dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; 306 drrfo->drr_firstobj = firstobj; 307 drrfo->drr_numobjs = numobjs; 308 drrfo->drr_toguid = dsp->dsa_toguid; 309 310 dsp->dsa_pending_op = PENDING_FREEOBJECTS; 311 312 return (0); 313} 314 315static int 316dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) 317{ 318 struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); 319 320 if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 321 return (dump_freeobjects(dsp, object, 1)); 322 323 if (dsp->dsa_pending_op != PENDING_NONE) { 324 if (dump_bytes(dsp, dsp->dsa_drr, 325 sizeof (dmu_replay_record_t)) != 0) 326 return (SET_ERROR(EINTR)); 327 dsp->dsa_pending_op = PENDING_NONE; 328 } 329 330 /* write an OBJECT record */ 331 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 332 dsp->dsa_drr->drr_type = DRR_OBJECT; 333 drro->drr_object = object; 334 drro->drr_type = dnp->dn_type; 335 drro->drr_bonustype = dnp->dn_bonustype; 336 drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 337 drro->drr_bonuslen = dnp->dn_bonuslen; 338 drro->drr_checksumtype = dnp->dn_checksum; 339 drro->drr_compress = dnp->dn_compress; 340 drro->drr_toguid = dsp->dsa_toguid; 341 342 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 343 return (SET_ERROR(EINTR)); 344 345 if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) 346 return (SET_ERROR(EINTR)); 347 348 /* Free anything past the end of the file. */ 349 if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * 350 (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) 351 return (SET_ERROR(EINTR)); 352 if (dsp->dsa_err != 0) 353 return (SET_ERROR(EINTR)); 354 return (0); 355} 356 357#define BP_SPAN(dnp, level) \ 358 (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 359 (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 360 361/* ARGSUSED */ 362static int 363backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 364 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 365{ 366 dmu_sendarg_t *dsp = arg; 367 dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 368 int err = 0; 369 370 if (issig(JUSTLOOKING) && issig(FORREAL)) 371 return (SET_ERROR(EINTR)); 372 373 if (zb->zb_object != DMU_META_DNODE_OBJECT && 374 DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { 375 return (0); 376 } else if (zb->zb_level == ZB_ZIL_LEVEL) { 377 /* 378 * If we are sending a non-snapshot (which is allowed on 379 * read-only pools), it may have a ZIL, which must be ignored. 380 */ 381 return (0); 382 } else if (BP_IS_HOLE(bp) && 383 zb->zb_object == DMU_META_DNODE_OBJECT) { 384 uint64_t span = BP_SPAN(dnp, zb->zb_level); 385 uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; 386 err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); 387 } else if (BP_IS_HOLE(bp)) { 388 uint64_t span = BP_SPAN(dnp, zb->zb_level); 389 err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); 390 } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { 391 return (0); 392 } else if (type == DMU_OT_DNODE) { 393 dnode_phys_t *blk; 394 int i; 395 int blksz = BP_GET_LSIZE(bp); 396 uint32_t aflags = ARC_WAIT; 397 arc_buf_t *abuf; 398 399 if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 400 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 401 &aflags, zb) != 0) 402 return (SET_ERROR(EIO)); 403 404 blk = abuf->b_data; 405 for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 406 uint64_t dnobj = (zb->zb_blkid << 407 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 408 err = dump_dnode(dsp, dnobj, blk+i); 409 if (err != 0) 410 break; 411 } 412 (void) arc_buf_remove_ref(abuf, &abuf); 413 } else if (type == DMU_OT_SA) { 414 uint32_t aflags = ARC_WAIT; 415 arc_buf_t *abuf; 416 int blksz = BP_GET_LSIZE(bp); 417 418 if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 419 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 420 &aflags, zb) != 0) 421 return (SET_ERROR(EIO)); 422 423 err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); 424 (void) arc_buf_remove_ref(abuf, &abuf); 425 } else { /* it's a level-0 block of a regular object */ 426 uint32_t aflags = ARC_WAIT; 427 arc_buf_t *abuf; 428 int blksz = BP_GET_LSIZE(bp); 429 430 ASSERT0(zb->zb_level); 431 if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 432 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 433 &aflags, zb) != 0) { 434 if (zfs_send_corrupt_data) { 435 /* Send a block filled with 0x"zfs badd bloc" */ 436 abuf = arc_buf_alloc(spa, blksz, &abuf, 437 ARC_BUFC_DATA); 438 uint64_t *ptr; 439 for (ptr = abuf->b_data; 440 (char *)ptr < (char *)abuf->b_data + blksz; 441 ptr++) 442 *ptr = 0x2f5baddb10c; 443 } else { 444 return (SET_ERROR(EIO)); 445 } 446 } 447 448 err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz, 449 blksz, bp, abuf->b_data); 450 (void) arc_buf_remove_ref(abuf, &abuf); 451 } 452 453 ASSERT(err == 0 || err == EINTR); 454 return (err); 455} 456 457/* 458 * Releases dp using the specified tag. 459 */ 460static int 461dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, 462#ifdef illumos 463 zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd, 464 vnode_t *vp, offset_t *off) 465#else 466 zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd, 467 struct file *fp, offset_t *off) 468#endif 469{ 470 objset_t *os; 471 dmu_replay_record_t *drr; 472 dmu_sendarg_t *dsp; 473 int err; 474 uint64_t fromtxg = 0; 475 476 err = dmu_objset_from_ds(ds, &os); 477 if (err != 0) { 478 dsl_pool_rele(dp, tag); 479 return (err); 480 } 481 482 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 483 drr->drr_type = DRR_BEGIN; 484 drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 485 DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, 486 DMU_SUBSTREAM); 487 488#ifdef _KERNEL 489 if (dmu_objset_type(os) == DMU_OST_ZFS) { 490 uint64_t version; 491 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { 492 kmem_free(drr, sizeof (dmu_replay_record_t)); 493 dsl_pool_rele(dp, tag); 494 return (SET_ERROR(EINVAL)); 495 } 496 if (version >= ZPL_VERSION_SA) { 497 DMU_SET_FEATUREFLAGS( 498 drr->drr_u.drr_begin.drr_versioninfo, 499 DMU_BACKUP_FEATURE_SA_SPILL); 500 } 501 } 502#endif 503 504 drr->drr_u.drr_begin.drr_creation_time = 505 ds->ds_phys->ds_creation_time; 506 drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); 507 if (is_clone) 508 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 509 drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 510 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 511 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 512 513 if (fromzb != NULL) { 514 drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid; 515 fromtxg = fromzb->zbm_creation_txg; 516 } 517 dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 518 if (!dsl_dataset_is_snapshot(ds)) { 519 (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--", 520 sizeof (drr->drr_u.drr_begin.drr_toname)); 521 } 522 523 dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); 524 525 dsp->dsa_drr = drr; 526 dsp->dsa_outfd = outfd; 527 dsp->dsa_proc = curproc; 528 dsp->dsa_td = curthread; 529 dsp->dsa_fp = fp; 530 dsp->dsa_os = os; 531 dsp->dsa_off = off; 532 dsp->dsa_toguid = ds->ds_phys->ds_guid; 533 ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); 534 dsp->dsa_pending_op = PENDING_NONE; 535 dsp->dsa_incremental = (fromzb != NULL); 536 537 mutex_enter(&ds->ds_sendstream_lock); 538 list_insert_head(&ds->ds_sendstreams, dsp); 539 mutex_exit(&ds->ds_sendstream_lock); 540 541 dsl_dataset_long_hold(ds, FTAG); 542 dsl_pool_rele(dp, tag); 543 544 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 545 err = dsp->dsa_err; 546 goto out; 547 } 548 549 err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, 550 backup_cb, dsp); 551 552 if (dsp->dsa_pending_op != PENDING_NONE) 553 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) 554 err = SET_ERROR(EINTR); 555 556 if (err != 0) { 557 if (err == EINTR && dsp->dsa_err != 0) 558 err = dsp->dsa_err; 559 goto out; 560 } 561 562 bzero(drr, sizeof (dmu_replay_record_t)); 563 drr->drr_type = DRR_END; 564 drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; 565 drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; 566 567 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 568 err = dsp->dsa_err; 569 goto out; 570 } 571 572out: 573 mutex_enter(&ds->ds_sendstream_lock); 574 list_remove(&ds->ds_sendstreams, dsp); 575 mutex_exit(&ds->ds_sendstream_lock); 576 577 kmem_free(drr, sizeof (dmu_replay_record_t)); 578 kmem_free(dsp, sizeof (dmu_sendarg_t)); 579 580 dsl_dataset_long_rele(ds, FTAG); 581 582 return (err); 583} 584 585int 586dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, 587#ifdef illumos 588 int outfd, vnode_t *vp, offset_t *off) 589#else 590 int outfd, struct file *fp, offset_t *off) 591#endif 592{ 593 dsl_pool_t *dp; 594 dsl_dataset_t *ds; 595 dsl_dataset_t *fromds = NULL; 596 int err; 597 598 err = dsl_pool_hold(pool, FTAG, &dp); 599 if (err != 0) 600 return (err); 601 602 err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); 603 if (err != 0) { 604 dsl_pool_rele(dp, FTAG); 605 return (err); 606 } 607 608 if (fromsnap != 0) { 609 zfs_bookmark_phys_t zb; 610 boolean_t is_clone; 611 612 err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); 613 if (err != 0) { 614 dsl_dataset_rele(ds, FTAG); 615 dsl_pool_rele(dp, FTAG); 616 return (err); 617 } 618 if (!dsl_dataset_is_before(ds, fromds, 0)) 619 err = SET_ERROR(EXDEV); 620 zb.zbm_creation_time = fromds->ds_phys->ds_creation_time; 621 zb.zbm_creation_txg = fromds->ds_phys->ds_creation_txg; 622 zb.zbm_guid = fromds->ds_phys->ds_guid; 623 is_clone = (fromds->ds_dir != ds->ds_dir); 624 dsl_dataset_rele(fromds, FTAG); 625 err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, 626 outfd, fp, off); 627 } else { 628 err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, 629 outfd, fp, off); 630 } 631 dsl_dataset_rele(ds, FTAG); 632 return (err); 633} 634 635int 636dmu_send(const char *tosnap, const char *fromsnap, 637#ifdef illumos 638 int outfd, vnode_t *vp, offset_t *off) 639#else 640 int outfd, struct file *fp, offset_t *off) 641#endif 642{ 643 dsl_pool_t *dp; 644 dsl_dataset_t *ds; 645 int err; 646 boolean_t owned = B_FALSE; 647 648 if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) 649 return (SET_ERROR(EINVAL)); 650 651 err = dsl_pool_hold(tosnap, FTAG, &dp); 652 if (err != 0) 653 return (err); 654 655 if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) { 656 /* 657 * We are sending a filesystem or volume. Ensure 658 * that it doesn't change by owning the dataset. 659 */ 660 err = dsl_dataset_own(dp, tosnap, FTAG, &ds); 661 owned = B_TRUE; 662 } else { 663 err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); 664 } 665 if (err != 0) { 666 dsl_pool_rele(dp, FTAG); 667 return (err); 668 } 669 670 if (fromsnap != NULL) { 671 zfs_bookmark_phys_t zb; 672 boolean_t is_clone = B_FALSE; 673 int fsnamelen = strchr(tosnap, '@') - tosnap; 674 675 /* 676 * If the fromsnap is in a different filesystem, then 677 * mark the send stream as a clone. 678 */ 679 if (strncmp(tosnap, fromsnap, fsnamelen) != 0 || 680 (fromsnap[fsnamelen] != '@' && 681 fromsnap[fsnamelen] != '#')) { 682 is_clone = B_TRUE; 683 } 684 685 if (strchr(fromsnap, '@')) { 686 dsl_dataset_t *fromds; 687 err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); 688 if (err == 0) { 689 if (!dsl_dataset_is_before(ds, fromds, 0)) 690 err = SET_ERROR(EXDEV); 691 zb.zbm_creation_time = 692 fromds->ds_phys->ds_creation_time; 693 zb.zbm_creation_txg = 694 fromds->ds_phys->ds_creation_txg; 695 zb.zbm_guid = fromds->ds_phys->ds_guid; 696 is_clone = (ds->ds_dir != fromds->ds_dir); 697 dsl_dataset_rele(fromds, FTAG); 698 } 699 } else { 700 err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); 701 } 702 if (err != 0) { 703 dsl_dataset_rele(ds, FTAG); 704 dsl_pool_rele(dp, FTAG); 705 return (err); 706 } 707 err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, 708 outfd, fp, off); 709 } else { 710 err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, 711 outfd, fp, off); 712 } 713 if (owned) 714 dsl_dataset_disown(ds, FTAG); 715 else 716 dsl_dataset_rele(ds, FTAG); 717 return (err); 718} 719 720int 721dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) 722{ 723 dsl_pool_t *dp = ds->ds_dir->dd_pool; 724 int err; 725 uint64_t size; 726 727 ASSERT(dsl_pool_config_held(dp)); 728 729 /* tosnap must be a snapshot */ 730 if (!dsl_dataset_is_snapshot(ds)) 731 return (SET_ERROR(EINVAL)); 732 733 /* 734 * fromsnap must be an earlier snapshot from the same fs as tosnap, 735 * or the origin's fs. 736 */ 737 if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0)) 738 return (SET_ERROR(EXDEV)); 739 740 /* Get uncompressed size estimate of changed data. */ 741 if (fromds == NULL) { 742 size = ds->ds_phys->ds_uncompressed_bytes; 743 } else { 744 uint64_t used, comp; 745 err = dsl_dataset_space_written(fromds, ds, 746 &used, &comp, &size); 747 if (err != 0) 748 return (err); 749 } 750 751 /* 752 * Assume that space (both on-disk and in-stream) is dominated by 753 * data. We will adjust for indirect blocks and the copies property, 754 * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). 755 */ 756 757 /* 758 * Subtract out approximate space used by indirect blocks. 759 * Assume most space is used by data blocks (non-indirect, non-dnode). 760 * Assume all blocks are recordsize. Assume ditto blocks and 761 * internal fragmentation counter out compression. 762 * 763 * Therefore, space used by indirect blocks is sizeof(blkptr_t) per 764 * block, which we observe in practice. 765 */ 766 uint64_t recordsize; 767 err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize); 768 if (err != 0) 769 return (err); 770 size -= size / recordsize * sizeof (blkptr_t); 771 772 /* Add in the space for the record associated with each block. */ 773 size += size / recordsize * sizeof (dmu_replay_record_t); 774 775 *sizep = size; 776 777 return (0); 778} 779 780typedef struct dmu_recv_begin_arg { 781 const char *drba_origin; 782 dmu_recv_cookie_t *drba_cookie; 783 cred_t *drba_cred; 784 uint64_t drba_snapobj; 785} dmu_recv_begin_arg_t; 786 787static int 788recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, 789 uint64_t fromguid) 790{ 791 uint64_t val; 792 int error; 793 dsl_pool_t *dp = ds->ds_dir->dd_pool; 794 795 /* temporary clone name must not exist */ 796 error = zap_lookup(dp->dp_meta_objset, 797 ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name, 798 8, 1, &val); 799 if (error != ENOENT) 800 return (error == 0 ? EBUSY : error); 801 802 /* new snapshot name must not exist */ 803 error = zap_lookup(dp->dp_meta_objset, 804 ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 805 8, 1, &val); 806 if (error != ENOENT) 807 return (error == 0 ? EEXIST : error); 808 809 if (fromguid != 0) { 810 dsl_dataset_t *snap; 811 uint64_t obj = ds->ds_phys->ds_prev_snap_obj; 812 813 /* Find snapshot in this dir that matches fromguid. */ 814 while (obj != 0) { 815 error = dsl_dataset_hold_obj(dp, obj, FTAG, 816 &snap); 817 if (error != 0) 818 return (SET_ERROR(ENODEV)); 819 if (snap->ds_dir != ds->ds_dir) { 820 dsl_dataset_rele(snap, FTAG); 821 return (SET_ERROR(ENODEV)); 822 } 823 if (snap->ds_phys->ds_guid == fromguid) 824 break; 825 obj = snap->ds_phys->ds_prev_snap_obj; 826 dsl_dataset_rele(snap, FTAG); 827 } 828 if (obj == 0) 829 return (SET_ERROR(ENODEV)); 830 831 if (drba->drba_cookie->drc_force) { 832 drba->drba_snapobj = obj; 833 } else { 834 /* 835 * If we are not forcing, there must be no 836 * changes since fromsnap. 837 */ 838 if (dsl_dataset_modified_since_snap(ds, snap)) { 839 dsl_dataset_rele(snap, FTAG); 840 return (SET_ERROR(ETXTBSY)); 841 } 842 drba->drba_snapobj = ds->ds_prev->ds_object; 843 } 844 845 dsl_dataset_rele(snap, FTAG); 846 } else { 847 /* if full, most recent snapshot must be $ORIGIN */ 848 if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) 849 return (SET_ERROR(ENODEV)); 850 drba->drba_snapobj = ds->ds_phys->ds_prev_snap_obj; 851 } 852 853 return (0); 854 855} 856 857static int 858dmu_recv_begin_check(void *arg, dmu_tx_t *tx) 859{ 860 dmu_recv_begin_arg_t *drba = arg; 861 dsl_pool_t *dp = dmu_tx_pool(tx); 862 struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 863 uint64_t fromguid = drrb->drr_fromguid; 864 int flags = drrb->drr_flags; 865 int error; 866 dsl_dataset_t *ds; 867 const char *tofs = drba->drba_cookie->drc_tofs; 868 869 /* already checked */ 870 ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 871 872 if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == 873 DMU_COMPOUNDSTREAM || 874 drrb->drr_type >= DMU_OST_NUMTYPES || 875 ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) 876 return (SET_ERROR(EINVAL)); 877 878 /* Verify pool version supports SA if SA_SPILL feature set */ 879 if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & 880 DMU_BACKUP_FEATURE_SA_SPILL) && 881 spa_version(dp->dp_spa) < SPA_VERSION_SA) { 882 return (SET_ERROR(ENOTSUP)); 883 } 884 885 error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 886 if (error == 0) { 887 /* target fs already exists; recv into temp clone */ 888 889 /* Can't recv a clone into an existing fs */ 890 if (flags & DRR_FLAG_CLONE) { 891 dsl_dataset_rele(ds, FTAG); 892 return (SET_ERROR(EINVAL)); 893 } 894 895 error = recv_begin_check_existing_impl(drba, ds, fromguid); 896 dsl_dataset_rele(ds, FTAG); 897 } else if (error == ENOENT) { 898 /* target fs does not exist; must be a full backup or clone */ 899 char buf[MAXNAMELEN]; 900 901 /* 902 * If it's a non-clone incremental, we are missing the 903 * target fs, so fail the recv. 904 */ 905 if (fromguid != 0 && !(flags & DRR_FLAG_CLONE)) 906 return (SET_ERROR(ENOENT)); 907 908 /* Open the parent of tofs */ 909 ASSERT3U(strlen(tofs), <, MAXNAMELEN); 910 (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); 911 error = dsl_dataset_hold(dp, buf, FTAG, &ds); 912 if (error != 0) 913 return (error); 914 915 if (drba->drba_origin != NULL) { 916 dsl_dataset_t *origin; 917 error = dsl_dataset_hold(dp, drba->drba_origin, 918 FTAG, &origin); 919 if (error != 0) { 920 dsl_dataset_rele(ds, FTAG); 921 return (error); 922 } 923 if (!dsl_dataset_is_snapshot(origin)) { 924 dsl_dataset_rele(origin, FTAG); 925 dsl_dataset_rele(ds, FTAG); 926 return (SET_ERROR(EINVAL)); 927 } 928 if (origin->ds_phys->ds_guid != fromguid) { 929 dsl_dataset_rele(origin, FTAG); 930 dsl_dataset_rele(ds, FTAG); 931 return (SET_ERROR(ENODEV)); 932 } 933 dsl_dataset_rele(origin, FTAG); 934 } 935 dsl_dataset_rele(ds, FTAG); 936 error = 0; 937 } 938 return (error); 939} 940 941static void 942dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 943{ 944 dmu_recv_begin_arg_t *drba = arg; 945 dsl_pool_t *dp = dmu_tx_pool(tx); 946 struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 947 const char *tofs = drba->drba_cookie->drc_tofs; 948 dsl_dataset_t *ds, *newds; 949 uint64_t dsobj; 950 int error; 951 uint64_t crflags; 952 953 crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? 954 DS_FLAG_CI_DATASET : 0; 955 956 error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 957 if (error == 0) { 958 /* create temporary clone */ 959 dsl_dataset_t *snap = NULL; 960 if (drba->drba_snapobj != 0) { 961 VERIFY0(dsl_dataset_hold_obj(dp, 962 drba->drba_snapobj, FTAG, &snap)); 963 } 964 dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, 965 snap, crflags, drba->drba_cred, tx); 966 dsl_dataset_rele(snap, FTAG); 967 dsl_dataset_rele(ds, FTAG); 968 } else { 969 dsl_dir_t *dd; 970 const char *tail; 971 dsl_dataset_t *origin = NULL; 972 973 VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail)); 974 975 if (drba->drba_origin != NULL) { 976 VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, 977 FTAG, &origin)); 978 } 979 980 /* Create new dataset. */ 981 dsobj = dsl_dataset_create_sync(dd, 982 strrchr(tofs, '/') + 1, 983 origin, crflags, drba->drba_cred, tx); 984 if (origin != NULL) 985 dsl_dataset_rele(origin, FTAG); 986 dsl_dir_rele(dd, FTAG); 987 drba->drba_cookie->drc_newfs = B_TRUE; 988 } 989 VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); 990 991 dmu_buf_will_dirty(newds->ds_dbuf, tx); 992 newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 993 994 /* 995 * If we actually created a non-clone, we need to create the 996 * objset in our new dataset. 997 */ 998 if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { 999 (void) dmu_objset_create_impl(dp->dp_spa, 1000 newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); 1001 } 1002 1003 drba->drba_cookie->drc_ds = newds; 1004 1005 spa_history_log_internal_ds(newds, "receive", tx, ""); 1006} 1007 1008/* 1009 * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 1010 * succeeds; otherwise we will leak the holds on the datasets. 1011 */ 1012int 1013dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, 1014 boolean_t force, char *origin, dmu_recv_cookie_t *drc) 1015{ 1016 dmu_recv_begin_arg_t drba = { 0 }; 1017 dmu_replay_record_t *drr; 1018 1019 bzero(drc, sizeof (dmu_recv_cookie_t)); 1020 drc->drc_drrb = drrb; 1021 drc->drc_tosnap = tosnap; 1022 drc->drc_tofs = tofs; 1023 drc->drc_force = force; 1024 1025 if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 1026 drc->drc_byteswap = B_TRUE; 1027 else if (drrb->drr_magic != DMU_BACKUP_MAGIC) 1028 return (SET_ERROR(EINVAL)); 1029 1030 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 1031 drr->drr_type = DRR_BEGIN; 1032 drr->drr_u.drr_begin = *drc->drc_drrb; 1033 if (drc->drc_byteswap) { 1034 fletcher_4_incremental_byteswap(drr, 1035 sizeof (dmu_replay_record_t), &drc->drc_cksum); 1036 } else { 1037 fletcher_4_incremental_native(drr, 1038 sizeof (dmu_replay_record_t), &drc->drc_cksum); 1039 } 1040 kmem_free(drr, sizeof (dmu_replay_record_t)); 1041 1042 if (drc->drc_byteswap) { 1043 drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1044 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); 1045 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1046 drrb->drr_type = BSWAP_32(drrb->drr_type); 1047 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1048 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1049 } 1050 1051 drba.drba_origin = origin; 1052 drba.drba_cookie = drc; 1053 drba.drba_cred = CRED(); 1054 1055 return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, 1056 &drba, 5)); 1057} 1058 1059struct restorearg { 1060 int err; 1061 boolean_t byteswap; 1062 kthread_t *td; 1063 struct file *fp; 1064 char *buf; 1065 uint64_t voff; 1066 int bufsize; /* amount of memory allocated for buf */ 1067 zio_cksum_t cksum; 1068 avl_tree_t *guid_to_ds_map; 1069}; 1070 1071typedef struct guid_map_entry { 1072 uint64_t guid; 1073 dsl_dataset_t *gme_ds; 1074 avl_node_t avlnode; 1075} guid_map_entry_t; 1076 1077static int 1078guid_compare(const void *arg1, const void *arg2) 1079{ 1080 const guid_map_entry_t *gmep1 = arg1; 1081 const guid_map_entry_t *gmep2 = arg2; 1082 1083 if (gmep1->guid < gmep2->guid) 1084 return (-1); 1085 else if (gmep1->guid > gmep2->guid) 1086 return (1); 1087 return (0); 1088} 1089 1090static void 1091free_guid_map_onexit(void *arg) 1092{ 1093 avl_tree_t *ca = arg; 1094 void *cookie = NULL; 1095 guid_map_entry_t *gmep; 1096 1097 while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { 1098 dsl_dataset_long_rele(gmep->gme_ds, gmep); 1099 dsl_dataset_rele(gmep->gme_ds, gmep); 1100 kmem_free(gmep, sizeof (guid_map_entry_t)); 1101 } 1102 avl_destroy(ca); 1103 kmem_free(ca, sizeof (avl_tree_t)); 1104} 1105 1106static int 1107restore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid) 1108{ 1109 struct uio auio; 1110 struct iovec aiov; 1111 int error; 1112 1113 aiov.iov_base = buf; 1114 aiov.iov_len = len; 1115 auio.uio_iov = &aiov; 1116 auio.uio_iovcnt = 1; 1117 auio.uio_resid = len; 1118 auio.uio_segflg = UIO_SYSSPACE; 1119 auio.uio_rw = UIO_READ; 1120 auio.uio_offset = off; 1121 auio.uio_td = ra->td; 1122#ifdef _KERNEL 1123 error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 1124#else 1125 fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 1126 error = EOPNOTSUPP; 1127#endif 1128 *resid = auio.uio_resid; 1129 return (error); 1130} 1131 1132static void * 1133restore_read(struct restorearg *ra, int len) 1134{ 1135 void *rv; 1136 int done = 0; 1137 1138 /* some things will require 8-byte alignment, so everything must */ 1139 ASSERT0(len % 8); 1140 1141 while (done < len) { 1142 ssize_t resid; 1143 1144 ra->err = restore_bytes(ra, (caddr_t)ra->buf + done, 1145 len - done, ra->voff, &resid); 1146 1147 if (resid == len - done) 1148 ra->err = SET_ERROR(EINVAL); 1149 ra->voff += len - done - resid; 1150 done = len - resid; 1151 if (ra->err != 0) 1152 return (NULL); 1153 } 1154 1155 ASSERT3U(done, ==, len); 1156 rv = ra->buf; 1157 if (ra->byteswap) 1158 fletcher_4_incremental_byteswap(rv, len, &ra->cksum); 1159 else 1160 fletcher_4_incremental_native(rv, len, &ra->cksum); 1161 return (rv); 1162} 1163 1164static void 1165backup_byteswap(dmu_replay_record_t *drr) 1166{ 1167#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 1168#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 1169 drr->drr_type = BSWAP_32(drr->drr_type); 1170 drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 1171 switch (drr->drr_type) { 1172 case DRR_BEGIN: 1173 DO64(drr_begin.drr_magic); 1174 DO64(drr_begin.drr_versioninfo); 1175 DO64(drr_begin.drr_creation_time); 1176 DO32(drr_begin.drr_type); 1177 DO32(drr_begin.drr_flags); 1178 DO64(drr_begin.drr_toguid); 1179 DO64(drr_begin.drr_fromguid); 1180 break; 1181 case DRR_OBJECT: 1182 DO64(drr_object.drr_object); 1183 /* DO64(drr_object.drr_allocation_txg); */ 1184 DO32(drr_object.drr_type); 1185 DO32(drr_object.drr_bonustype); 1186 DO32(drr_object.drr_blksz); 1187 DO32(drr_object.drr_bonuslen); 1188 DO64(drr_object.drr_toguid); 1189 break; 1190 case DRR_FREEOBJECTS: 1191 DO64(drr_freeobjects.drr_firstobj); 1192 DO64(drr_freeobjects.drr_numobjs); 1193 DO64(drr_freeobjects.drr_toguid); 1194 break; 1195 case DRR_WRITE: 1196 DO64(drr_write.drr_object); 1197 DO32(drr_write.drr_type); 1198 DO64(drr_write.drr_offset); 1199 DO64(drr_write.drr_length); 1200 DO64(drr_write.drr_toguid); 1201 DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); 1202 DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); 1203 DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); 1204 DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); 1205 DO64(drr_write.drr_key.ddk_prop); 1206 break; 1207 case DRR_WRITE_BYREF: 1208 DO64(drr_write_byref.drr_object); 1209 DO64(drr_write_byref.drr_offset); 1210 DO64(drr_write_byref.drr_length); 1211 DO64(drr_write_byref.drr_toguid); 1212 DO64(drr_write_byref.drr_refguid); 1213 DO64(drr_write_byref.drr_refobject); 1214 DO64(drr_write_byref.drr_refoffset); 1215 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); 1216 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); 1217 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); 1218 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); 1219 DO64(drr_write_byref.drr_key.ddk_prop); 1220 break; 1221 case DRR_FREE: 1222 DO64(drr_free.drr_object); 1223 DO64(drr_free.drr_offset); 1224 DO64(drr_free.drr_length); 1225 DO64(drr_free.drr_toguid); 1226 break; 1227 case DRR_SPILL: 1228 DO64(drr_spill.drr_object); 1229 DO64(drr_spill.drr_length); 1230 DO64(drr_spill.drr_toguid); 1231 break; 1232 case DRR_END: 1233 DO64(drr_end.drr_checksum.zc_word[0]); 1234 DO64(drr_end.drr_checksum.zc_word[1]); 1235 DO64(drr_end.drr_checksum.zc_word[2]); 1236 DO64(drr_end.drr_checksum.zc_word[3]); 1237 DO64(drr_end.drr_toguid); 1238 break; 1239 } 1240#undef DO64 1241#undef DO32 1242} 1243 1244static int 1245restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 1246{ 1247 int err; 1248 dmu_tx_t *tx; 1249 void *data = NULL; 1250 1251 if (drro->drr_type == DMU_OT_NONE || 1252 !DMU_OT_IS_VALID(drro->drr_type) || 1253 !DMU_OT_IS_VALID(drro->drr_bonustype) || 1254 drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || 1255 drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1256 P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1257 drro->drr_blksz < SPA_MINBLOCKSIZE || 1258 drro->drr_blksz > SPA_MAXBLOCKSIZE || 1259 drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1260 return (SET_ERROR(EINVAL)); 1261 } 1262 1263 err = dmu_object_info(os, drro->drr_object, NULL); 1264 1265 if (err != 0 && err != ENOENT) 1266 return (SET_ERROR(EINVAL)); 1267 1268 if (drro->drr_bonuslen) { 1269 data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); 1270 if (ra->err != 0) 1271 return (ra->err); 1272 } 1273 1274 if (err == ENOENT) { 1275 /* currently free, want to be allocated */ 1276 tx = dmu_tx_create(os); 1277 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1278 err = dmu_tx_assign(tx, TXG_WAIT); 1279 if (err != 0) { 1280 dmu_tx_abort(tx); 1281 return (err); 1282 } 1283 err = dmu_object_claim(os, drro->drr_object, 1284 drro->drr_type, drro->drr_blksz, 1285 drro->drr_bonustype, drro->drr_bonuslen, tx); 1286 dmu_tx_commit(tx); 1287 } else { 1288 /* currently allocated, want to be allocated */ 1289 err = dmu_object_reclaim(os, drro->drr_object, 1290 drro->drr_type, drro->drr_blksz, 1291 drro->drr_bonustype, drro->drr_bonuslen); 1292 } 1293 if (err != 0) { 1294 return (SET_ERROR(EINVAL)); 1295 } 1296 1297 tx = dmu_tx_create(os); 1298 dmu_tx_hold_bonus(tx, drro->drr_object); 1299 err = dmu_tx_assign(tx, TXG_WAIT); 1300 if (err != 0) { 1301 dmu_tx_abort(tx); 1302 return (err); 1303 } 1304 1305 dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, 1306 tx); 1307 dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1308 1309 if (data != NULL) { 1310 dmu_buf_t *db; 1311 1312 VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1313 dmu_buf_will_dirty(db, tx); 1314 1315 ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 1316 bcopy(data, db->db_data, drro->drr_bonuslen); 1317 if (ra->byteswap) { 1318 dmu_object_byteswap_t byteswap = 1319 DMU_OT_BYTESWAP(drro->drr_bonustype); 1320 dmu_ot_byteswap[byteswap].ob_func(db->db_data, 1321 drro->drr_bonuslen); 1322 } 1323 dmu_buf_rele(db, FTAG); 1324 } 1325 dmu_tx_commit(tx); 1326 return (0); 1327} 1328 1329/* ARGSUSED */ 1330static int 1331restore_freeobjects(struct restorearg *ra, objset_t *os, 1332 struct drr_freeobjects *drrfo) 1333{ 1334 uint64_t obj; 1335 1336 if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1337 return (SET_ERROR(EINVAL)); 1338 1339 for (obj = drrfo->drr_firstobj; 1340 obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 1341 (void) dmu_object_next(os, &obj, FALSE, 0)) { 1342 int err; 1343 1344 if (dmu_object_info(os, obj, NULL) != 0) 1345 continue; 1346 1347 err = dmu_free_long_object(os, obj); 1348 if (err != 0) 1349 return (err); 1350 } 1351 return (0); 1352} 1353 1354static int 1355restore_write(struct restorearg *ra, objset_t *os, 1356 struct drr_write *drrw) 1357{ 1358 dmu_tx_t *tx; 1359 void *data; 1360 int err; 1361 1362 if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1363 !DMU_OT_IS_VALID(drrw->drr_type)) 1364 return (SET_ERROR(EINVAL)); 1365 1366 data = restore_read(ra, drrw->drr_length); 1367 if (data == NULL) 1368 return (ra->err); 1369 1370 if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1371 return (SET_ERROR(EINVAL)); 1372 1373 tx = dmu_tx_create(os); 1374 1375 dmu_tx_hold_write(tx, drrw->drr_object, 1376 drrw->drr_offset, drrw->drr_length); 1377 err = dmu_tx_assign(tx, TXG_WAIT); 1378 if (err != 0) { 1379 dmu_tx_abort(tx); 1380 return (err); 1381 } 1382 if (ra->byteswap) { 1383 dmu_object_byteswap_t byteswap = 1384 DMU_OT_BYTESWAP(drrw->drr_type); 1385 dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); 1386 } 1387 dmu_write(os, drrw->drr_object, 1388 drrw->drr_offset, drrw->drr_length, data, tx); 1389 dmu_tx_commit(tx); 1390 return (0); 1391} 1392 1393/* 1394 * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed 1395 * streams to refer to a copy of the data that is already on the 1396 * system because it came in earlier in the stream. This function 1397 * finds the earlier copy of the data, and uses that copy instead of 1398 * data from the stream to fulfill this write. 1399 */ 1400static int 1401restore_write_byref(struct restorearg *ra, objset_t *os, 1402 struct drr_write_byref *drrwbr) 1403{ 1404 dmu_tx_t *tx; 1405 int err; 1406 guid_map_entry_t gmesrch; 1407 guid_map_entry_t *gmep; 1408 avl_index_t where; 1409 objset_t *ref_os = NULL; 1410 dmu_buf_t *dbp; 1411 1412 if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) 1413 return (SET_ERROR(EINVAL)); 1414 1415 /* 1416 * If the GUID of the referenced dataset is different from the 1417 * GUID of the target dataset, find the referenced dataset. 1418 */ 1419 if (drrwbr->drr_toguid != drrwbr->drr_refguid) { 1420 gmesrch.guid = drrwbr->drr_refguid; 1421 if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, 1422 &where)) == NULL) { 1423 return (SET_ERROR(EINVAL)); 1424 } 1425 if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) 1426 return (SET_ERROR(EINVAL)); 1427 } else { 1428 ref_os = os; 1429 } 1430 1431 if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, 1432 drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH)) 1433 return (err); 1434 1435 tx = dmu_tx_create(os); 1436 1437 dmu_tx_hold_write(tx, drrwbr->drr_object, 1438 drrwbr->drr_offset, drrwbr->drr_length); 1439 err = dmu_tx_assign(tx, TXG_WAIT); 1440 if (err != 0) { 1441 dmu_tx_abort(tx); 1442 return (err); 1443 } 1444 dmu_write(os, drrwbr->drr_object, 1445 drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); 1446 dmu_buf_rele(dbp, FTAG); 1447 dmu_tx_commit(tx); 1448 return (0); 1449} 1450 1451static int 1452restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) 1453{ 1454 dmu_tx_t *tx; 1455 void *data; 1456 dmu_buf_t *db, *db_spill; 1457 int err; 1458 1459 if (drrs->drr_length < SPA_MINBLOCKSIZE || 1460 drrs->drr_length > SPA_MAXBLOCKSIZE) 1461 return (SET_ERROR(EINVAL)); 1462 1463 data = restore_read(ra, drrs->drr_length); 1464 if (data == NULL) 1465 return (ra->err); 1466 1467 if (dmu_object_info(os, drrs->drr_object, NULL) != 0) 1468 return (SET_ERROR(EINVAL)); 1469 1470 VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); 1471 if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { 1472 dmu_buf_rele(db, FTAG); 1473 return (err); 1474 } 1475 1476 tx = dmu_tx_create(os); 1477 1478 dmu_tx_hold_spill(tx, db->db_object); 1479 1480 err = dmu_tx_assign(tx, TXG_WAIT); 1481 if (err != 0) { 1482 dmu_buf_rele(db, FTAG); 1483 dmu_buf_rele(db_spill, FTAG); 1484 dmu_tx_abort(tx); 1485 return (err); 1486 } 1487 dmu_buf_will_dirty(db_spill, tx); 1488 1489 if (db_spill->db_size < drrs->drr_length) 1490 VERIFY(0 == dbuf_spill_set_blksz(db_spill, 1491 drrs->drr_length, tx)); 1492 bcopy(data, db_spill->db_data, drrs->drr_length); 1493 1494 dmu_buf_rele(db, FTAG); 1495 dmu_buf_rele(db_spill, FTAG); 1496 1497 dmu_tx_commit(tx); 1498 return (0); 1499} 1500 1501/* ARGSUSED */ 1502static int 1503restore_free(struct restorearg *ra, objset_t *os, 1504 struct drr_free *drrf) 1505{ 1506 int err; 1507 1508 if (drrf->drr_length != -1ULL && 1509 drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1510 return (SET_ERROR(EINVAL)); 1511 1512 if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1513 return (SET_ERROR(EINVAL)); 1514 1515 err = dmu_free_long_range(os, drrf->drr_object, 1516 drrf->drr_offset, drrf->drr_length); 1517 return (err); 1518} 1519 1520/* used to destroy the drc_ds on error */ 1521static void 1522dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) 1523{ 1524 char name[MAXNAMELEN]; 1525 dsl_dataset_name(drc->drc_ds, name); 1526 dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1527 (void) dsl_destroy_head(name); 1528} 1529 1530/* 1531 * NB: callers *must* call dmu_recv_end() if this succeeds. 1532 */ 1533int 1534dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, 1535 int cleanup_fd, uint64_t *action_handlep) 1536{ 1537 struct restorearg ra = { 0 }; 1538 dmu_replay_record_t *drr; 1539 objset_t *os; 1540 zio_cksum_t pcksum; 1541 int featureflags; 1542 1543 ra.byteswap = drc->drc_byteswap; 1544 ra.cksum = drc->drc_cksum; 1545 ra.td = curthread; 1546 ra.fp = fp; 1547 ra.voff = *voffp; 1548 ra.bufsize = 1<<20; 1549 ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1550 1551 /* these were verified in dmu_recv_begin */ 1552 ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, 1553 DMU_SUBSTREAM); 1554 ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES); 1555 1556 /* 1557 * Open the objset we are modifying. 1558 */ 1559 VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); 1560 1561 ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); 1562 1563 featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); 1564 1565 /* if this stream is dedup'ed, set up the avl tree for guid mapping */ 1566 if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 1567 minor_t minor; 1568 1569 if (cleanup_fd == -1) { 1570 ra.err = SET_ERROR(EBADF); 1571 goto out; 1572 } 1573 ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); 1574 if (ra.err != 0) { 1575 cleanup_fd = -1; 1576 goto out; 1577 } 1578 1579 if (*action_handlep == 0) { 1580 ra.guid_to_ds_map = 1581 kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 1582 avl_create(ra.guid_to_ds_map, guid_compare, 1583 sizeof (guid_map_entry_t), 1584 offsetof(guid_map_entry_t, avlnode)); 1585 ra.err = zfs_onexit_add_cb(minor, 1586 free_guid_map_onexit, ra.guid_to_ds_map, 1587 action_handlep); 1588 if (ra.err != 0) 1589 goto out; 1590 } else { 1591 ra.err = zfs_onexit_cb_data(minor, *action_handlep, 1592 (void **)&ra.guid_to_ds_map); 1593 if (ra.err != 0) 1594 goto out; 1595 } 1596 1597 drc->drc_guid_to_ds_map = ra.guid_to_ds_map; 1598 } 1599 1600 /* 1601 * Read records and process them. 1602 */ 1603 pcksum = ra.cksum; 1604 while (ra.err == 0 && 1605 NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1606 if (issig(JUSTLOOKING) && issig(FORREAL)) { 1607 ra.err = SET_ERROR(EINTR); 1608 goto out; 1609 } 1610 1611 if (ra.byteswap) 1612 backup_byteswap(drr); 1613 1614 switch (drr->drr_type) { 1615 case DRR_OBJECT: 1616 { 1617 /* 1618 * We need to make a copy of the record header, 1619 * because restore_{object,write} may need to 1620 * restore_read(), which will invalidate drr. 1621 */ 1622 struct drr_object drro = drr->drr_u.drr_object; 1623 ra.err = restore_object(&ra, os, &drro); 1624 break; 1625 } 1626 case DRR_FREEOBJECTS: 1627 { 1628 struct drr_freeobjects drrfo = 1629 drr->drr_u.drr_freeobjects; 1630 ra.err = restore_freeobjects(&ra, os, &drrfo); 1631 break; 1632 } 1633 case DRR_WRITE: 1634 { 1635 struct drr_write drrw = drr->drr_u.drr_write; 1636 ra.err = restore_write(&ra, os, &drrw); 1637 break; 1638 } 1639 case DRR_WRITE_BYREF: 1640 { 1641 struct drr_write_byref drrwbr = 1642 drr->drr_u.drr_write_byref; 1643 ra.err = restore_write_byref(&ra, os, &drrwbr); 1644 break; 1645 } 1646 case DRR_FREE: 1647 { 1648 struct drr_free drrf = drr->drr_u.drr_free; 1649 ra.err = restore_free(&ra, os, &drrf); 1650 break; 1651 } 1652 case DRR_END: 1653 { 1654 struct drr_end drre = drr->drr_u.drr_end; 1655 /* 1656 * We compare against the *previous* checksum 1657 * value, because the stored checksum is of 1658 * everything before the DRR_END record. 1659 */ 1660 if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 1661 ra.err = SET_ERROR(ECKSUM); 1662 goto out; 1663 } 1664 case DRR_SPILL: 1665 { 1666 struct drr_spill drrs = drr->drr_u.drr_spill; 1667 ra.err = restore_spill(&ra, os, &drrs); 1668 break; 1669 } 1670 default: 1671 ra.err = SET_ERROR(EINVAL); 1672 goto out; 1673 } 1674 pcksum = ra.cksum; 1675 } 1676 ASSERT(ra.err != 0); 1677 1678out: 1679 if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) 1680 zfs_onexit_fd_rele(cleanup_fd); 1681 1682 if (ra.err != 0) { 1683 /* 1684 * destroy what we created, so we don't leave it in the 1685 * inconsistent restoring state. 1686 */ 1687 dmu_recv_cleanup_ds(drc); 1688 } 1689 1690 kmem_free(ra.buf, ra.bufsize); 1691 *voffp = ra.voff; 1692 return (ra.err); 1693} 1694 1695static int 1696dmu_recv_end_check(void *arg, dmu_tx_t *tx) 1697{ 1698 dmu_recv_cookie_t *drc = arg; 1699 dsl_pool_t *dp = dmu_tx_pool(tx); 1700 int error; 1701 1702 ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); 1703 1704 if (!drc->drc_newfs) { 1705 dsl_dataset_t *origin_head; 1706 1707 error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); 1708 if (error != 0) 1709 return (error); 1710 if (drc->drc_force) { 1711 /* 1712 * We will destroy any snapshots in tofs (i.e. before 1713 * origin_head) that are after the origin (which is 1714 * the snap before drc_ds, because drc_ds can not 1715 * have any snaps of its own). 1716 */ 1717 uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1718 while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1719 dsl_dataset_t *snap; 1720 error = dsl_dataset_hold_obj(dp, obj, FTAG, 1721 &snap); 1722 if (error != 0) 1723 return (error); 1724 if (snap->ds_dir != origin_head->ds_dir) 1725 error = SET_ERROR(EINVAL); 1726 if (error == 0) { 1727 error = dsl_destroy_snapshot_check_impl( 1728 snap, B_FALSE); 1729 } 1730 obj = snap->ds_phys->ds_prev_snap_obj; 1731 dsl_dataset_rele(snap, FTAG); 1732 if (error != 0) 1733 return (error); 1734 } 1735 } 1736 error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, 1737 origin_head, drc->drc_force, drc->drc_owner, tx); 1738 if (error != 0) { 1739 dsl_dataset_rele(origin_head, FTAG); 1740 return (error); 1741 } 1742 error = dsl_dataset_snapshot_check_impl(origin_head, 1743 drc->drc_tosnap, tx, B_TRUE); 1744 dsl_dataset_rele(origin_head, FTAG); 1745 if (error != 0) 1746 return (error); 1747 1748 error = dsl_destroy_head_check_impl(drc->drc_ds, 1); 1749 } else { 1750 error = dsl_dataset_snapshot_check_impl(drc->drc_ds, 1751 drc->drc_tosnap, tx, B_TRUE); 1752 } 1753 return (error); 1754} 1755 1756static void 1757dmu_recv_end_sync(void *arg, dmu_tx_t *tx) 1758{ 1759 dmu_recv_cookie_t *drc = arg; 1760 dsl_pool_t *dp = dmu_tx_pool(tx); 1761 1762 spa_history_log_internal_ds(drc->drc_ds, "finish receiving", 1763 tx, "snap=%s", drc->drc_tosnap); 1764 1765 if (!drc->drc_newfs) { 1766 dsl_dataset_t *origin_head; 1767 1768 VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, 1769 &origin_head)); 1770 1771 if (drc->drc_force) { 1772 /* 1773 * Destroy any snapshots of drc_tofs (origin_head) 1774 * after the origin (the snap before drc_ds). 1775 */ 1776 uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1777 while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1778 dsl_dataset_t *snap; 1779 VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, 1780 &snap)); 1781 ASSERT3P(snap->ds_dir, ==, origin_head->ds_dir); 1782 obj = snap->ds_phys->ds_prev_snap_obj; 1783 dsl_destroy_snapshot_sync_impl(snap, 1784 B_FALSE, tx); 1785 dsl_dataset_rele(snap, FTAG); 1786 } 1787 } 1788 VERIFY3P(drc->drc_ds->ds_prev, ==, 1789 origin_head->ds_prev); 1790 1791 dsl_dataset_clone_swap_sync_impl(drc->drc_ds, 1792 origin_head, tx); 1793 dsl_dataset_snapshot_sync_impl(origin_head, 1794 drc->drc_tosnap, tx); 1795 1796 /* set snapshot's creation time and guid */ 1797 dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); 1798 origin_head->ds_prev->ds_phys->ds_creation_time = 1799 drc->drc_drrb->drr_creation_time; 1800 origin_head->ds_prev->ds_phys->ds_guid = 1801 drc->drc_drrb->drr_toguid; 1802 origin_head->ds_prev->ds_phys->ds_flags &= 1803 ~DS_FLAG_INCONSISTENT; 1804 1805 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 1806 origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1807 1808 dsl_dataset_rele(origin_head, FTAG); 1809 dsl_destroy_head_sync_impl(drc->drc_ds, tx); 1810 1811 if (drc->drc_owner != NULL) 1812 VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner); 1813 } else { 1814 dsl_dataset_t *ds = drc->drc_ds; 1815 1816 dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx); 1817 1818 /* set snapshot's creation time and guid */ 1819 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1820 ds->ds_prev->ds_phys->ds_creation_time = 1821 drc->drc_drrb->drr_creation_time; 1822 ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid; 1823 ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1824 1825 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1826 ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1827 } 1828 drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj; 1829 /* 1830 * Release the hold from dmu_recv_begin. This must be done before 1831 * we return to open context, so that when we free the dataset's dnode, 1832 * we can evict its bonus buffer. 1833 */ 1834 dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1835 drc->drc_ds = NULL; 1836} 1837 1838static int 1839add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) 1840{ 1841 dsl_pool_t *dp; 1842 dsl_dataset_t *snapds; 1843 guid_map_entry_t *gmep; 1844 int err; 1845 1846 ASSERT(guid_map != NULL); 1847 1848 err = dsl_pool_hold(name, FTAG, &dp); 1849 if (err != 0) 1850 return (err); 1851 gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); 1852 err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); 1853 if (err == 0) { 1854 gmep->guid = snapds->ds_phys->ds_guid; 1855 gmep->gme_ds = snapds; 1856 avl_add(guid_map, gmep); 1857 dsl_dataset_long_hold(snapds, gmep); 1858 } else 1859 kmem_free(gmep, sizeof (*gmep)); 1860 1861 dsl_pool_rele(dp, FTAG); 1862 return (err); 1863} 1864 1865static int dmu_recv_end_modified_blocks = 3; 1866 1867static int 1868dmu_recv_existing_end(dmu_recv_cookie_t *drc) 1869{ 1870 int error; 1871 char name[MAXNAMELEN]; 1872 1873#ifdef _KERNEL 1874 /* 1875 * We will be destroying the ds; make sure its origin is unmounted if 1876 * necessary. 1877 */ 1878 dsl_dataset_name(drc->drc_ds, name); 1879 zfs_destroy_unmount_origin(name); 1880#endif 1881 1882 error = dsl_sync_task(drc->drc_tofs, 1883 dmu_recv_end_check, dmu_recv_end_sync, drc, 1884 dmu_recv_end_modified_blocks); 1885 1886 if (error != 0) 1887 dmu_recv_cleanup_ds(drc); 1888 return (error); 1889} 1890 1891static int 1892dmu_recv_new_end(dmu_recv_cookie_t *drc) 1893{ 1894 int error; 1895 1896 error = dsl_sync_task(drc->drc_tofs, 1897 dmu_recv_end_check, dmu_recv_end_sync, drc, 1898 dmu_recv_end_modified_blocks); 1899 1900 if (error != 0) { 1901 dmu_recv_cleanup_ds(drc); 1902 } else if (drc->drc_guid_to_ds_map != NULL) { 1903 (void) add_ds_to_guidmap(drc->drc_tofs, 1904 drc->drc_guid_to_ds_map, 1905 drc->drc_newsnapobj); 1906 } 1907 return (error); 1908} 1909 1910int 1911dmu_recv_end(dmu_recv_cookie_t *drc, void *owner) 1912{ 1913 drc->drc_owner = owner; 1914 1915 if (drc->drc_newfs) 1916 return (dmu_recv_new_end(drc)); 1917 else 1918 return (dmu_recv_existing_end(drc)); 1919} 1920 1921/* 1922 * Return TRUE if this objset is currently being received into. 1923 */ 1924boolean_t 1925dmu_objset_is_receiving(objset_t *os) 1926{ 1927 return (os->os_dsl_dataset != NULL && 1928 os->os_dsl_dataset->ds_owner == dmu_recv_tag); 1929} 1930