1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23221263Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24268657Sdelphij * Copyright (c) 2011, 2014 by Delphix. All rights reserved. 25265744Sdelphij * Copyright (c) 2014, Joyent, Inc. All rights reserved. 26235222Smm * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved. 27221263Smm */ 28168404Spjd 29168404Spjd#include <sys/dmu.h> 30168404Spjd#include <sys/dmu_impl.h> 31168404Spjd#include <sys/dmu_tx.h> 32168404Spjd#include <sys/dbuf.h> 33168404Spjd#include <sys/dnode.h> 34168404Spjd#include <sys/zfs_context.h> 35168404Spjd#include <sys/dmu_objset.h> 36168404Spjd#include <sys/dmu_traverse.h> 37168404Spjd#include <sys/dsl_dataset.h> 38168404Spjd#include <sys/dsl_dir.h> 39219089Spjd#include <sys/dsl_prop.h> 40168404Spjd#include <sys/dsl_pool.h> 41168404Spjd#include <sys/dsl_synctask.h> 42168404Spjd#include <sys/zfs_ioctl.h> 43168404Spjd#include <sys/zap.h> 44168404Spjd#include <sys/zio_checksum.h> 45219089Spjd#include <sys/zfs_znode.h> 46219089Spjd#include <zfs_fletcher.h> 47219089Spjd#include <sys/avl.h> 48219089Spjd#include <sys/ddt.h> 49219089Spjd#include <sys/zfs_onexit.h> 50248571Smm#include <sys/dmu_send.h> 51248571Smm#include <sys/dsl_destroy.h> 52268649Sdelphij#include <sys/blkptr.h> 53263407Sdelphij#include <sys/dsl_bookmark.h> 54268649Sdelphij#include <sys/zfeature.h> 55168404Spjd 56268649Sdelphij#ifdef __FreeBSD__ 57268649Sdelphij#undef dump_write 58268649Sdelphij#define dump_write dmu_dump_write 59268649Sdelphij#endif 60268649Sdelphij 61228103Smm/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ 62228103Smmint zfs_send_corrupt_data = B_FALSE; 63228103Smm 64185029Spjdstatic char *dmu_recv_tag = "dmu_recv_tag"; 65248571Smmstatic const char *recv_clone_name = "%recv"; 66185029Spjd 67168404Spjdstatic int 68235222Smmdump_bytes(dmu_sendarg_t *dsp, void *buf, int len) 69168404Spjd{ 70235222Smm dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; 71168404Spjd struct uio auio; 72168404Spjd struct iovec aiov; 73240415Smm ASSERT0(len % 8); 74168404Spjd 75235222Smm fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); 76168404Spjd aiov.iov_base = buf; 77168404Spjd aiov.iov_len = len; 78168404Spjd auio.uio_iov = &aiov; 79168404Spjd auio.uio_iovcnt = 1; 80168404Spjd auio.uio_resid = len; 81169170Spjd auio.uio_segflg = UIO_SYSSPACE; 82168404Spjd auio.uio_rw = UIO_WRITE; 83168404Spjd auio.uio_offset = (off_t)-1; 84235222Smm auio.uio_td = dsp->dsa_td; 85168404Spjd#ifdef _KERNEL 86235222Smm if (dsp->dsa_fp->f_type == DTYPE_VNODE) 87168404Spjd bwillwrite(); 88235222Smm dsp->dsa_err = fo_write(dsp->dsa_fp, &auio, dsp->dsa_td->td_ucred, 0, 89235222Smm dsp->dsa_td); 90168404Spjd#else 91168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 92235222Smm dsp->dsa_err = EOPNOTSUPP; 93168404Spjd#endif 94235222Smm mutex_enter(&ds->ds_sendstream_lock); 95235222Smm *dsp->dsa_off += len; 96235222Smm mutex_exit(&ds->ds_sendstream_lock); 97235222Smm 98235222Smm return (dsp->dsa_err); 99168404Spjd} 100168404Spjd 101168404Spjdstatic int 102235222Smmdump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 103168404Spjd uint64_t length) 104168404Spjd{ 105235222Smm struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); 106219089Spjd 107253821Sdelphij /* 108253821Sdelphij * When we receive a free record, dbuf_free_range() assumes 109253821Sdelphij * that the receiving system doesn't have any dbufs in the range 110253821Sdelphij * being freed. This is always true because there is a one-record 111253821Sdelphij * constraint: we only send one WRITE record for any given 112253821Sdelphij * object+offset. We know that the one-record constraint is 113253821Sdelphij * true because we always send data in increasing order by 114253821Sdelphij * object,offset. 115253821Sdelphij * 116253821Sdelphij * If the increasing-order constraint ever changes, we should find 117253821Sdelphij * another way to assert that the one-record constraint is still 118253821Sdelphij * satisfied. 119253821Sdelphij */ 120253821Sdelphij ASSERT(object > dsp->dsa_last_data_object || 121253821Sdelphij (object == dsp->dsa_last_data_object && 122253821Sdelphij offset > dsp->dsa_last_data_offset)); 123253821Sdelphij 124253821Sdelphij /* 125253821Sdelphij * If we are doing a non-incremental send, then there can't 126253821Sdelphij * be any data in the dataset we're receiving into. Therefore 127253821Sdelphij * a free record would simply be a no-op. Save space by not 128253821Sdelphij * sending it to begin with. 129253821Sdelphij */ 130253821Sdelphij if (!dsp->dsa_incremental) 131253821Sdelphij return (0); 132253821Sdelphij 133237458Smm if (length != -1ULL && offset + length < offset) 134237458Smm length = -1ULL; 135237458Smm 136219089Spjd /* 137219089Spjd * If there is a pending op, but it's not PENDING_FREE, push it out, 138219089Spjd * since free block aggregation can only be done for blocks of the 139219089Spjd * same type (i.e., DRR_FREE records can only be aggregated with 140219089Spjd * other DRR_FREE records. DRR_FREEOBJECTS records can only be 141219089Spjd * aggregated with other DRR_FREEOBJECTS records. 142219089Spjd */ 143235222Smm if (dsp->dsa_pending_op != PENDING_NONE && 144235222Smm dsp->dsa_pending_op != PENDING_FREE) { 145235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 146235222Smm sizeof (dmu_replay_record_t)) != 0) 147249195Smm return (SET_ERROR(EINTR)); 148235222Smm dsp->dsa_pending_op = PENDING_NONE; 149219089Spjd } 150219089Spjd 151235222Smm if (dsp->dsa_pending_op == PENDING_FREE) { 152219089Spjd /* 153219089Spjd * There should never be a PENDING_FREE if length is -1 154219089Spjd * (because dump_dnode is the only place where this 155219089Spjd * function is called with a -1, and only after flushing 156219089Spjd * any pending record). 157219089Spjd */ 158219089Spjd ASSERT(length != -1ULL); 159219089Spjd /* 160219089Spjd * Check to see whether this free block can be aggregated 161219089Spjd * with pending one. 162219089Spjd */ 163219089Spjd if (drrf->drr_object == object && drrf->drr_offset + 164219089Spjd drrf->drr_length == offset) { 165219089Spjd drrf->drr_length += length; 166219089Spjd return (0); 167219089Spjd } else { 168219089Spjd /* not a continuation. Push out pending record */ 169235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 170219089Spjd sizeof (dmu_replay_record_t)) != 0) 171249195Smm return (SET_ERROR(EINTR)); 172235222Smm dsp->dsa_pending_op = PENDING_NONE; 173219089Spjd } 174219089Spjd } 175219089Spjd /* create a FREE record and make it pending */ 176235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 177235222Smm dsp->dsa_drr->drr_type = DRR_FREE; 178219089Spjd drrf->drr_object = object; 179219089Spjd drrf->drr_offset = offset; 180219089Spjd drrf->drr_length = length; 181235222Smm drrf->drr_toguid = dsp->dsa_toguid; 182219089Spjd if (length == -1ULL) { 183235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 184235222Smm sizeof (dmu_replay_record_t)) != 0) 185249195Smm return (SET_ERROR(EINTR)); 186219089Spjd } else { 187235222Smm dsp->dsa_pending_op = PENDING_FREE; 188219089Spjd } 189168404Spjd 190168404Spjd return (0); 191168404Spjd} 192168404Spjd 193168404Spjdstatic int 194268649Sdelphijdump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, 195219089Spjd uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) 196168404Spjd{ 197235222Smm struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); 198219089Spjd 199253821Sdelphij /* 200253821Sdelphij * We send data in increasing object, offset order. 201253821Sdelphij * See comment in dump_free() for details. 202253821Sdelphij */ 203253821Sdelphij ASSERT(object > dsp->dsa_last_data_object || 204253821Sdelphij (object == dsp->dsa_last_data_object && 205253821Sdelphij offset > dsp->dsa_last_data_offset)); 206253821Sdelphij dsp->dsa_last_data_object = object; 207253821Sdelphij dsp->dsa_last_data_offset = offset + blksz - 1; 208219089Spjd 209219089Spjd /* 210219089Spjd * If there is any kind of pending aggregation (currently either 211219089Spjd * a grouping of free objects or free blocks), push it out to 212219089Spjd * the stream, since aggregation can't be done across operations 213219089Spjd * of different types. 214219089Spjd */ 215235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 216235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 217235222Smm sizeof (dmu_replay_record_t)) != 0) 218249195Smm return (SET_ERROR(EINTR)); 219235222Smm dsp->dsa_pending_op = PENDING_NONE; 220219089Spjd } 221168404Spjd /* write a DATA record */ 222235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 223235222Smm dsp->dsa_drr->drr_type = DRR_WRITE; 224219089Spjd drrw->drr_object = object; 225219089Spjd drrw->drr_type = type; 226219089Spjd drrw->drr_offset = offset; 227219089Spjd drrw->drr_length = blksz; 228235222Smm drrw->drr_toguid = dsp->dsa_toguid; 229268649Sdelphij if (BP_IS_EMBEDDED(bp)) { 230268649Sdelphij /* 231268649Sdelphij * There's no pre-computed checksum of embedded BP's, so 232268649Sdelphij * (like fletcher4-checkummed blocks) userland will have 233268649Sdelphij * to compute a dedup-capable checksum itself. 234268649Sdelphij */ 235268649Sdelphij drrw->drr_checksumtype = ZIO_CHECKSUM_OFF; 236268649Sdelphij } else { 237268649Sdelphij drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); 238268649Sdelphij if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) 239268649Sdelphij drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; 240268649Sdelphij DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); 241268649Sdelphij DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); 242268649Sdelphij DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); 243268649Sdelphij drrw->drr_key.ddk_cksum = bp->blk_cksum; 244268649Sdelphij } 245168404Spjd 246235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 247249195Smm return (SET_ERROR(EINTR)); 248235222Smm if (dump_bytes(dsp, data, blksz) != 0) 249249195Smm return (SET_ERROR(EINTR)); 250219089Spjd return (0); 251219089Spjd} 252219089Spjd 253219089Spjdstatic int 254268649Sdelphijdump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 255268649Sdelphij int blksz, const blkptr_t *bp) 256268649Sdelphij{ 257268649Sdelphij char buf[BPE_PAYLOAD_SIZE]; 258268649Sdelphij struct drr_write_embedded *drrw = 259268649Sdelphij &(dsp->dsa_drr->drr_u.drr_write_embedded); 260268649Sdelphij 261268649Sdelphij if (dsp->dsa_pending_op != PENDING_NONE) { 262268649Sdelphij if (dump_bytes(dsp, dsp->dsa_drr, 263268649Sdelphij sizeof (dmu_replay_record_t)) != 0) 264268649Sdelphij return (EINTR); 265268649Sdelphij dsp->dsa_pending_op = PENDING_NONE; 266268649Sdelphij } 267268649Sdelphij 268268649Sdelphij ASSERT(BP_IS_EMBEDDED(bp)); 269268649Sdelphij 270268649Sdelphij bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 271268649Sdelphij dsp->dsa_drr->drr_type = DRR_WRITE_EMBEDDED; 272268649Sdelphij drrw->drr_object = object; 273268649Sdelphij drrw->drr_offset = offset; 274268649Sdelphij drrw->drr_length = blksz; 275268649Sdelphij drrw->drr_toguid = dsp->dsa_toguid; 276268649Sdelphij drrw->drr_compression = BP_GET_COMPRESS(bp); 277268649Sdelphij drrw->drr_etype = BPE_GET_ETYPE(bp); 278268649Sdelphij drrw->drr_lsize = BPE_GET_LSIZE(bp); 279268649Sdelphij drrw->drr_psize = BPE_GET_PSIZE(bp); 280268649Sdelphij 281268649Sdelphij decode_embedded_bp_compressed(bp, buf); 282268649Sdelphij 283268649Sdelphij if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 284268649Sdelphij return (EINTR); 285268649Sdelphij if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0) 286268649Sdelphij return (EINTR); 287268649Sdelphij return (0); 288268649Sdelphij} 289268649Sdelphij 290268649Sdelphijstatic int 291235222Smmdump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) 292219089Spjd{ 293235222Smm struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); 294219089Spjd 295235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 296235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 297235222Smm sizeof (dmu_replay_record_t)) != 0) 298249195Smm return (SET_ERROR(EINTR)); 299235222Smm dsp->dsa_pending_op = PENDING_NONE; 300219089Spjd } 301219089Spjd 302219089Spjd /* write a SPILL record */ 303235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 304235222Smm dsp->dsa_drr->drr_type = DRR_SPILL; 305219089Spjd drrs->drr_object = object; 306219089Spjd drrs->drr_length = blksz; 307235222Smm drrs->drr_toguid = dsp->dsa_toguid; 308219089Spjd 309235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) 310249195Smm return (SET_ERROR(EINTR)); 311235222Smm if (dump_bytes(dsp, data, blksz)) 312249195Smm return (SET_ERROR(EINTR)); 313168404Spjd return (0); 314168404Spjd} 315168404Spjd 316168404Spjdstatic int 317235222Smmdump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) 318168404Spjd{ 319235222Smm struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); 320219089Spjd 321253821Sdelphij /* See comment in dump_free(). */ 322253821Sdelphij if (!dsp->dsa_incremental) 323253821Sdelphij return (0); 324253821Sdelphij 325219089Spjd /* 326219089Spjd * If there is a pending op, but it's not PENDING_FREEOBJECTS, 327219089Spjd * push it out, since free block aggregation can only be done for 328219089Spjd * blocks of the same type (i.e., DRR_FREE records can only be 329219089Spjd * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records 330219089Spjd * can only be aggregated with other DRR_FREEOBJECTS records. 331219089Spjd */ 332235222Smm if (dsp->dsa_pending_op != PENDING_NONE && 333235222Smm dsp->dsa_pending_op != PENDING_FREEOBJECTS) { 334235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 335235222Smm sizeof (dmu_replay_record_t)) != 0) 336249195Smm return (SET_ERROR(EINTR)); 337235222Smm dsp->dsa_pending_op = PENDING_NONE; 338219089Spjd } 339235222Smm if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { 340219089Spjd /* 341219089Spjd * See whether this free object array can be aggregated 342219089Spjd * with pending one 343219089Spjd */ 344219089Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { 345219089Spjd drrfo->drr_numobjs += numobjs; 346219089Spjd return (0); 347219089Spjd } else { 348219089Spjd /* can't be aggregated. Push out pending record */ 349235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 350219089Spjd sizeof (dmu_replay_record_t)) != 0) 351249195Smm return (SET_ERROR(EINTR)); 352235222Smm dsp->dsa_pending_op = PENDING_NONE; 353219089Spjd } 354219089Spjd } 355219089Spjd 356168404Spjd /* write a FREEOBJECTS record */ 357235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 358235222Smm dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; 359219089Spjd drrfo->drr_firstobj = firstobj; 360219089Spjd drrfo->drr_numobjs = numobjs; 361235222Smm drrfo->drr_toguid = dsp->dsa_toguid; 362168404Spjd 363235222Smm dsp->dsa_pending_op = PENDING_FREEOBJECTS; 364219089Spjd 365168404Spjd return (0); 366168404Spjd} 367168404Spjd 368168404Spjdstatic int 369235222Smmdump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) 370168404Spjd{ 371235222Smm struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); 372219089Spjd 373168404Spjd if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 374235222Smm return (dump_freeobjects(dsp, object, 1)); 375168404Spjd 376235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 377235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 378235222Smm sizeof (dmu_replay_record_t)) != 0) 379249195Smm return (SET_ERROR(EINTR)); 380235222Smm dsp->dsa_pending_op = PENDING_NONE; 381219089Spjd } 382219089Spjd 383168404Spjd /* write an OBJECT record */ 384235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 385235222Smm dsp->dsa_drr->drr_type = DRR_OBJECT; 386219089Spjd drro->drr_object = object; 387219089Spjd drro->drr_type = dnp->dn_type; 388219089Spjd drro->drr_bonustype = dnp->dn_bonustype; 389219089Spjd drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 390219089Spjd drro->drr_bonuslen = dnp->dn_bonuslen; 391219089Spjd drro->drr_checksumtype = dnp->dn_checksum; 392219089Spjd drro->drr_compress = dnp->dn_compress; 393235222Smm drro->drr_toguid = dsp->dsa_toguid; 394168404Spjd 395235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 396249195Smm return (SET_ERROR(EINTR)); 397168404Spjd 398235222Smm if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) 399249195Smm return (SET_ERROR(EINTR)); 400168404Spjd 401253821Sdelphij /* Free anything past the end of the file. */ 402235222Smm if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * 403253821Sdelphij (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) 404249195Smm return (SET_ERROR(EINTR)); 405248571Smm if (dsp->dsa_err != 0) 406249195Smm return (SET_ERROR(EINTR)); 407168404Spjd return (0); 408168404Spjd} 409168404Spjd 410268649Sdelphijstatic boolean_t 411268649Sdelphijbackup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp) 412268649Sdelphij{ 413268649Sdelphij if (!BP_IS_EMBEDDED(bp)) 414268649Sdelphij return (B_FALSE); 415268649Sdelphij 416268649Sdelphij /* 417268649Sdelphij * Compression function must be legacy, or explicitly enabled. 418268649Sdelphij */ 419268649Sdelphij if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS && 420268649Sdelphij !(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4))) 421268649Sdelphij return (B_FALSE); 422268649Sdelphij 423268649Sdelphij /* 424268649Sdelphij * Embed type must be explicitly enabled. 425268649Sdelphij */ 426268649Sdelphij switch (BPE_GET_ETYPE(bp)) { 427268649Sdelphij case BP_EMBEDDED_TYPE_DATA: 428268649Sdelphij if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) 429268649Sdelphij return (B_TRUE); 430268649Sdelphij break; 431268649Sdelphij default: 432268649Sdelphij return (B_FALSE); 433268649Sdelphij } 434268649Sdelphij return (B_FALSE); 435268649Sdelphij} 436268649Sdelphij 437168404Spjd#define BP_SPAN(dnp, level) \ 438168404Spjd (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 439168404Spjd (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 440168404Spjd 441219089Spjd/* ARGSUSED */ 442168404Spjdstatic int 443246666Smmbackup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 444268657Sdelphij const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 445168404Spjd{ 446235222Smm dmu_sendarg_t *dsp = arg; 447168404Spjd dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 448168404Spjd int err = 0; 449168404Spjd 450185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) 451249195Smm return (SET_ERROR(EINTR)); 452168404Spjd 453219089Spjd if (zb->zb_object != DMU_META_DNODE_OBJECT && 454219089Spjd DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { 455209962Smm return (0); 456263407Sdelphij } else if (zb->zb_level == ZB_ZIL_LEVEL) { 457263407Sdelphij /* 458263407Sdelphij * If we are sending a non-snapshot (which is allowed on 459263407Sdelphij * read-only pools), it may have a ZIL, which must be ignored. 460263407Sdelphij */ 461263407Sdelphij return (0); 462263397Sdelphij } else if (BP_IS_HOLE(bp) && 463263397Sdelphij zb->zb_object == DMU_META_DNODE_OBJECT) { 464208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 465208047Smm uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; 466235222Smm err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); 467263397Sdelphij } else if (BP_IS_HOLE(bp)) { 468208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 469235222Smm err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); 470208047Smm } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { 471208047Smm return (0); 472208047Smm } else if (type == DMU_OT_DNODE) { 473208047Smm dnode_phys_t *blk; 474168404Spjd int i; 475168404Spjd int blksz = BP_GET_LSIZE(bp); 476208047Smm uint32_t aflags = ARC_WAIT; 477208047Smm arc_buf_t *abuf; 478168404Spjd 479246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 480246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 481246666Smm &aflags, zb) != 0) 482249195Smm return (SET_ERROR(EIO)); 483208047Smm 484208047Smm blk = abuf->b_data; 485168404Spjd for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 486208047Smm uint64_t dnobj = (zb->zb_blkid << 487208047Smm (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 488235222Smm err = dump_dnode(dsp, dnobj, blk+i); 489248571Smm if (err != 0) 490168404Spjd break; 491168404Spjd } 492208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 493219089Spjd } else if (type == DMU_OT_SA) { 494208047Smm uint32_t aflags = ARC_WAIT; 495208047Smm arc_buf_t *abuf; 496168404Spjd int blksz = BP_GET_LSIZE(bp); 497168404Spjd 498246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 499246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 500246666Smm &aflags, zb) != 0) 501249195Smm return (SET_ERROR(EIO)); 502168404Spjd 503235222Smm err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); 504219089Spjd (void) arc_buf_remove_ref(abuf, &abuf); 505268649Sdelphij } else if (backup_do_embed(dsp, bp)) { 506268649Sdelphij /* it's an embedded level-0 block of a regular object */ 507268649Sdelphij int blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 508268649Sdelphij err = dump_write_embedded(dsp, zb->zb_object, 509268649Sdelphij zb->zb_blkid * blksz, blksz, bp); 510219089Spjd } else { /* it's a level-0 block of a regular object */ 511219089Spjd uint32_t aflags = ARC_WAIT; 512219089Spjd arc_buf_t *abuf; 513219089Spjd int blksz = BP_GET_LSIZE(bp); 514219089Spjd 515268649Sdelphij ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 516263407Sdelphij ASSERT0(zb->zb_level); 517246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 518246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 519246666Smm &aflags, zb) != 0) { 520228103Smm if (zfs_send_corrupt_data) { 521228103Smm /* Send a block filled with 0x"zfs badd bloc" */ 522228103Smm abuf = arc_buf_alloc(spa, blksz, &abuf, 523228103Smm ARC_BUFC_DATA); 524228103Smm uint64_t *ptr; 525228103Smm for (ptr = abuf->b_data; 526228103Smm (char *)ptr < (char *)abuf->b_data + blksz; 527228103Smm ptr++) 528228103Smm *ptr = 0x2f5baddb10c; 529228103Smm } else { 530249195Smm return (SET_ERROR(EIO)); 531228103Smm } 532228103Smm } 533219089Spjd 534268649Sdelphij err = dump_write(dsp, type, zb->zb_object, zb->zb_blkid * blksz, 535219089Spjd blksz, bp, abuf->b_data); 536208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 537168404Spjd } 538168404Spjd 539168404Spjd ASSERT(err == 0 || err == EINTR); 540168404Spjd return (err); 541168404Spjd} 542168404Spjd 543248571Smm/* 544263407Sdelphij * Releases dp using the specified tag. 545248571Smm */ 546248571Smmstatic int 547248571Smmdmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, 548268649Sdelphij zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok, 549248571Smm#ifdef illumos 550268649Sdelphij int outfd, vnode_t *vp, offset_t *off) 551248571Smm#else 552268649Sdelphij int outfd, struct file *fp, offset_t *off) 553248571Smm#endif 554168404Spjd{ 555248571Smm objset_t *os; 556168404Spjd dmu_replay_record_t *drr; 557235222Smm dmu_sendarg_t *dsp; 558168404Spjd int err; 559185029Spjd uint64_t fromtxg = 0; 560268649Sdelphij uint64_t featureflags = 0; 561168404Spjd 562248571Smm err = dmu_objset_from_ds(ds, &os); 563248571Smm if (err != 0) { 564248571Smm dsl_pool_rele(dp, tag); 565248571Smm return (err); 566185029Spjd } 567185029Spjd 568168404Spjd drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 569168404Spjd drr->drr_type = DRR_BEGIN; 570168404Spjd drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 571219089Spjd DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, 572219089Spjd DMU_SUBSTREAM); 573219089Spjd 574219089Spjd#ifdef _KERNEL 575248571Smm if (dmu_objset_type(os) == DMU_OST_ZFS) { 576219089Spjd uint64_t version; 577248571Smm if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { 578235222Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 579248571Smm dsl_pool_rele(dp, tag); 580249195Smm return (SET_ERROR(EINVAL)); 581235222Smm } 582248571Smm if (version >= ZPL_VERSION_SA) { 583268649Sdelphij featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; 584219089Spjd } 585219089Spjd } 586219089Spjd#endif 587219089Spjd 588268649Sdelphij if (embedok && 589268649Sdelphij spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) { 590268649Sdelphij featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA; 591268649Sdelphij if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) 592268649Sdelphij featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4; 593268649Sdelphij } else { 594268649Sdelphij embedok = B_FALSE; 595268649Sdelphij } 596268649Sdelphij 597268649Sdelphij DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo, 598268649Sdelphij featureflags); 599268649Sdelphij 600168404Spjd drr->drr_u.drr_begin.drr_creation_time = 601168404Spjd ds->ds_phys->ds_creation_time; 602248571Smm drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); 603263407Sdelphij if (is_clone) 604185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 605168404Spjd drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 606185029Spjd if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 607185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 608185029Spjd 609263407Sdelphij if (fromzb != NULL) { 610263407Sdelphij drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid; 611263407Sdelphij fromtxg = fromzb->zbm_creation_txg; 612263407Sdelphij } 613168404Spjd dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 614263407Sdelphij if (!dsl_dataset_is_snapshot(ds)) { 615263407Sdelphij (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--", 616263407Sdelphij sizeof (drr->drr_u.drr_begin.drr_toname)); 617248571Smm } 618185029Spjd 619235222Smm dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); 620168404Spjd 621235222Smm dsp->dsa_drr = drr; 622235222Smm dsp->dsa_outfd = outfd; 623235222Smm dsp->dsa_proc = curproc; 624235222Smm dsp->dsa_td = curthread; 625235222Smm dsp->dsa_fp = fp; 626248571Smm dsp->dsa_os = os; 627235222Smm dsp->dsa_off = off; 628235222Smm dsp->dsa_toguid = ds->ds_phys->ds_guid; 629235222Smm ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); 630235222Smm dsp->dsa_pending_op = PENDING_NONE; 631263407Sdelphij dsp->dsa_incremental = (fromzb != NULL); 632268649Sdelphij dsp->dsa_featureflags = featureflags; 633235222Smm 634235222Smm mutex_enter(&ds->ds_sendstream_lock); 635235222Smm list_insert_head(&ds->ds_sendstreams, dsp); 636235222Smm mutex_exit(&ds->ds_sendstream_lock); 637235222Smm 638249042Smm dsl_dataset_long_hold(ds, FTAG); 639249042Smm dsl_pool_rele(dp, tag); 640249042Smm 641235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 642235222Smm err = dsp->dsa_err; 643235222Smm goto out; 644168404Spjd } 645168404Spjd 646208047Smm err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, 647235222Smm backup_cb, dsp); 648168404Spjd 649235222Smm if (dsp->dsa_pending_op != PENDING_NONE) 650235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) 651249195Smm err = SET_ERROR(EINTR); 652219089Spjd 653248571Smm if (err != 0) { 654248571Smm if (err == EINTR && dsp->dsa_err != 0) 655235222Smm err = dsp->dsa_err; 656235222Smm goto out; 657168404Spjd } 658168404Spjd 659168404Spjd bzero(drr, sizeof (dmu_replay_record_t)); 660168404Spjd drr->drr_type = DRR_END; 661235222Smm drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; 662235222Smm drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; 663168404Spjd 664235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 665235222Smm err = dsp->dsa_err; 666235222Smm goto out; 667168404Spjd } 668168404Spjd 669235222Smmout: 670235222Smm mutex_enter(&ds->ds_sendstream_lock); 671235222Smm list_remove(&ds->ds_sendstreams, dsp); 672235222Smm mutex_exit(&ds->ds_sendstream_lock); 673235222Smm 674168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 675235222Smm kmem_free(dsp, sizeof (dmu_sendarg_t)); 676168404Spjd 677248571Smm dsl_dataset_long_rele(ds, FTAG); 678248571Smm 679235222Smm return (err); 680168404Spjd} 681168404Spjd 682228103Smmint 683248571Smmdmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, 684248571Smm#ifdef illumos 685268649Sdelphij boolean_t embedok, int outfd, vnode_t *vp, offset_t *off) 686248571Smm#else 687268649Sdelphij boolean_t embedok, int outfd, struct file *fp, offset_t *off) 688248571Smm#endif 689228103Smm{ 690248571Smm dsl_pool_t *dp; 691248571Smm dsl_dataset_t *ds; 692248571Smm dsl_dataset_t *fromds = NULL; 693248571Smm int err; 694248571Smm 695248571Smm err = dsl_pool_hold(pool, FTAG, &dp); 696248571Smm if (err != 0) 697248571Smm return (err); 698248571Smm 699248571Smm err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); 700248571Smm if (err != 0) { 701248571Smm dsl_pool_rele(dp, FTAG); 702248571Smm return (err); 703248571Smm } 704248571Smm 705248571Smm if (fromsnap != 0) { 706263407Sdelphij zfs_bookmark_phys_t zb; 707263407Sdelphij boolean_t is_clone; 708263407Sdelphij 709248571Smm err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); 710248571Smm if (err != 0) { 711248571Smm dsl_dataset_rele(ds, FTAG); 712248571Smm dsl_pool_rele(dp, FTAG); 713248571Smm return (err); 714248571Smm } 715263407Sdelphij if (!dsl_dataset_is_before(ds, fromds, 0)) 716263407Sdelphij err = SET_ERROR(EXDEV); 717263407Sdelphij zb.zbm_creation_time = fromds->ds_phys->ds_creation_time; 718263407Sdelphij zb.zbm_creation_txg = fromds->ds_phys->ds_creation_txg; 719263407Sdelphij zb.zbm_guid = fromds->ds_phys->ds_guid; 720263407Sdelphij is_clone = (fromds->ds_dir != ds->ds_dir); 721263407Sdelphij dsl_dataset_rele(fromds, FTAG); 722268649Sdelphij err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok, 723263407Sdelphij outfd, fp, off); 724263407Sdelphij } else { 725268649Sdelphij err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok, 726263407Sdelphij outfd, fp, off); 727248571Smm } 728263407Sdelphij dsl_dataset_rele(ds, FTAG); 729263407Sdelphij return (err); 730248571Smm} 731248571Smm 732248571Smmint 733268649Sdelphijdmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, 734248571Smm#ifdef illumos 735248571Smm int outfd, vnode_t *vp, offset_t *off) 736248571Smm#else 737248571Smm int outfd, struct file *fp, offset_t *off) 738248571Smm#endif 739248571Smm{ 740248571Smm dsl_pool_t *dp; 741248571Smm dsl_dataset_t *ds; 742248571Smm int err; 743263407Sdelphij boolean_t owned = B_FALSE; 744248571Smm 745263407Sdelphij if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) 746249195Smm return (SET_ERROR(EINVAL)); 747248571Smm 748248571Smm err = dsl_pool_hold(tosnap, FTAG, &dp); 749248571Smm if (err != 0) 750248571Smm return (err); 751248571Smm 752263407Sdelphij if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) { 753263407Sdelphij /* 754263407Sdelphij * We are sending a filesystem or volume. Ensure 755263407Sdelphij * that it doesn't change by owning the dataset. 756263407Sdelphij */ 757263407Sdelphij err = dsl_dataset_own(dp, tosnap, FTAG, &ds); 758263407Sdelphij owned = B_TRUE; 759263407Sdelphij } else { 760263407Sdelphij err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); 761263407Sdelphij } 762248571Smm if (err != 0) { 763248571Smm dsl_pool_rele(dp, FTAG); 764248571Smm return (err); 765248571Smm } 766248571Smm 767248571Smm if (fromsnap != NULL) { 768263407Sdelphij zfs_bookmark_phys_t zb; 769263407Sdelphij boolean_t is_clone = B_FALSE; 770263407Sdelphij int fsnamelen = strchr(tosnap, '@') - tosnap; 771263407Sdelphij 772263407Sdelphij /* 773263407Sdelphij * If the fromsnap is in a different filesystem, then 774263407Sdelphij * mark the send stream as a clone. 775263407Sdelphij */ 776263407Sdelphij if (strncmp(tosnap, fromsnap, fsnamelen) != 0 || 777263407Sdelphij (fromsnap[fsnamelen] != '@' && 778263407Sdelphij fromsnap[fsnamelen] != '#')) { 779263407Sdelphij is_clone = B_TRUE; 780263407Sdelphij } 781263407Sdelphij 782263407Sdelphij if (strchr(fromsnap, '@')) { 783263407Sdelphij dsl_dataset_t *fromds; 784263407Sdelphij err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); 785263407Sdelphij if (err == 0) { 786263407Sdelphij if (!dsl_dataset_is_before(ds, fromds, 0)) 787263407Sdelphij err = SET_ERROR(EXDEV); 788263407Sdelphij zb.zbm_creation_time = 789263407Sdelphij fromds->ds_phys->ds_creation_time; 790263407Sdelphij zb.zbm_creation_txg = 791263407Sdelphij fromds->ds_phys->ds_creation_txg; 792263407Sdelphij zb.zbm_guid = fromds->ds_phys->ds_guid; 793263407Sdelphij is_clone = (ds->ds_dir != fromds->ds_dir); 794263407Sdelphij dsl_dataset_rele(fromds, FTAG); 795263407Sdelphij } 796263407Sdelphij } else { 797263407Sdelphij err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); 798263407Sdelphij } 799248571Smm if (err != 0) { 800248571Smm dsl_dataset_rele(ds, FTAG); 801248571Smm dsl_pool_rele(dp, FTAG); 802248571Smm return (err); 803248571Smm } 804268649Sdelphij err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok, 805263407Sdelphij outfd, fp, off); 806263407Sdelphij } else { 807268649Sdelphij err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok, 808263407Sdelphij outfd, fp, off); 809248571Smm } 810263407Sdelphij if (owned) 811263407Sdelphij dsl_dataset_disown(ds, FTAG); 812263407Sdelphij else 813263407Sdelphij dsl_dataset_rele(ds, FTAG); 814263407Sdelphij return (err); 815248571Smm} 816248571Smm 817248571Smmint 818248571Smmdmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) 819248571Smm{ 820228103Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 821228103Smm int err; 822228103Smm uint64_t size; 823228103Smm 824248571Smm ASSERT(dsl_pool_config_held(dp)); 825248571Smm 826228103Smm /* tosnap must be a snapshot */ 827248571Smm if (!dsl_dataset_is_snapshot(ds)) 828249195Smm return (SET_ERROR(EINVAL)); 829228103Smm 830248571Smm /* 831248571Smm * fromsnap must be an earlier snapshot from the same fs as tosnap, 832248571Smm * or the origin's fs. 833248571Smm */ 834263407Sdelphij if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0)) 835249195Smm return (SET_ERROR(EXDEV)); 836228103Smm 837228103Smm /* Get uncompressed size estimate of changed data. */ 838228103Smm if (fromds == NULL) { 839228103Smm size = ds->ds_phys->ds_uncompressed_bytes; 840228103Smm } else { 841228103Smm uint64_t used, comp; 842228103Smm err = dsl_dataset_space_written(fromds, ds, 843228103Smm &used, &comp, &size); 844248571Smm if (err != 0) 845228103Smm return (err); 846228103Smm } 847228103Smm 848228103Smm /* 849228103Smm * Assume that space (both on-disk and in-stream) is dominated by 850228103Smm * data. We will adjust for indirect blocks and the copies property, 851228103Smm * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). 852228103Smm */ 853228103Smm 854228103Smm /* 855228103Smm * Subtract out approximate space used by indirect blocks. 856228103Smm * Assume most space is used by data blocks (non-indirect, non-dnode). 857228103Smm * Assume all blocks are recordsize. Assume ditto blocks and 858228103Smm * internal fragmentation counter out compression. 859228103Smm * 860228103Smm * Therefore, space used by indirect blocks is sizeof(blkptr_t) per 861228103Smm * block, which we observe in practice. 862228103Smm */ 863228103Smm uint64_t recordsize; 864248571Smm err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize); 865248571Smm if (err != 0) 866228103Smm return (err); 867228103Smm size -= size / recordsize * sizeof (blkptr_t); 868228103Smm 869228103Smm /* Add in the space for the record associated with each block. */ 870228103Smm size += size / recordsize * sizeof (dmu_replay_record_t); 871228103Smm 872228103Smm *sizep = size; 873228103Smm 874228103Smm return (0); 875228103Smm} 876228103Smm 877248571Smmtypedef struct dmu_recv_begin_arg { 878248571Smm const char *drba_origin; 879248571Smm dmu_recv_cookie_t *drba_cookie; 880248571Smm cred_t *drba_cred; 881253820Sdelphij uint64_t drba_snapobj; 882248571Smm} dmu_recv_begin_arg_t; 883168404Spjd 884168404Spjdstatic int 885248571Smmrecv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, 886248571Smm uint64_t fromguid) 887168404Spjd{ 888185029Spjd uint64_t val; 889248571Smm int error; 890248571Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 891185029Spjd 892248571Smm /* temporary clone name must not exist */ 893248571Smm error = zap_lookup(dp->dp_meta_objset, 894248571Smm ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name, 895248571Smm 8, 1, &val); 896248571Smm if (error != ENOENT) 897248571Smm return (error == 0 ? EBUSY : error); 898248571Smm 899219089Spjd /* new snapshot name must not exist */ 900248571Smm error = zap_lookup(dp->dp_meta_objset, 901248571Smm ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 902248571Smm 8, 1, &val); 903248571Smm if (error != ENOENT) 904248571Smm return (error == 0 ? EEXIST : error); 905168404Spjd 906265744Sdelphij /* 907265744Sdelphij * Check snapshot limit before receiving. We'll recheck again at the 908265744Sdelphij * end, but might as well abort before receiving if we're already over 909265744Sdelphij * the limit. 910265744Sdelphij * 911265744Sdelphij * Note that we do not check the file system limit with 912265744Sdelphij * dsl_dir_fscount_check because the temporary %clones don't count 913265744Sdelphij * against that limit. 914265744Sdelphij */ 915265744Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, 916265744Sdelphij NULL, drba->drba_cred); 917265744Sdelphij if (error != 0) 918265744Sdelphij return (error); 919265744Sdelphij 920248571Smm if (fromguid != 0) { 921253820Sdelphij dsl_dataset_t *snap; 922253820Sdelphij uint64_t obj = ds->ds_phys->ds_prev_snap_obj; 923253820Sdelphij 924253820Sdelphij /* Find snapshot in this dir that matches fromguid. */ 925253820Sdelphij while (obj != 0) { 926253820Sdelphij error = dsl_dataset_hold_obj(dp, obj, FTAG, 927253820Sdelphij &snap); 928253820Sdelphij if (error != 0) 929253820Sdelphij return (SET_ERROR(ENODEV)); 930253820Sdelphij if (snap->ds_dir != ds->ds_dir) { 931253820Sdelphij dsl_dataset_rele(snap, FTAG); 932253820Sdelphij return (SET_ERROR(ENODEV)); 933253820Sdelphij } 934253820Sdelphij if (snap->ds_phys->ds_guid == fromguid) 935253820Sdelphij break; 936253820Sdelphij obj = snap->ds_phys->ds_prev_snap_obj; 937253820Sdelphij dsl_dataset_rele(snap, FTAG); 938253820Sdelphij } 939253820Sdelphij if (obj == 0) 940249195Smm return (SET_ERROR(ENODEV)); 941168404Spjd 942253820Sdelphij if (drba->drba_cookie->drc_force) { 943253820Sdelphij drba->drba_snapobj = obj; 944253820Sdelphij } else { 945253820Sdelphij /* 946253820Sdelphij * If we are not forcing, there must be no 947253820Sdelphij * changes since fromsnap. 948253820Sdelphij */ 949253820Sdelphij if (dsl_dataset_modified_since_snap(ds, snap)) { 950219089Spjd dsl_dataset_rele(snap, FTAG); 951253820Sdelphij return (SET_ERROR(ETXTBSY)); 952219089Spjd } 953253820Sdelphij drba->drba_snapobj = ds->ds_prev->ds_object; 954219089Spjd } 955253820Sdelphij 956253820Sdelphij dsl_dataset_rele(snap, FTAG); 957219089Spjd } else { 958219089Spjd /* if full, most recent snapshot must be $ORIGIN */ 959219089Spjd if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) 960249195Smm return (SET_ERROR(ENODEV)); 961253820Sdelphij drba->drba_snapobj = ds->ds_phys->ds_prev_snap_obj; 962219089Spjd } 963219089Spjd 964248571Smm return (0); 965168404Spjd 966168404Spjd} 967168404Spjd 968248571Smmstatic int 969248571Smmdmu_recv_begin_check(void *arg, dmu_tx_t *tx) 970248571Smm{ 971248571Smm dmu_recv_begin_arg_t *drba = arg; 972248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 973248571Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 974248571Smm uint64_t fromguid = drrb->drr_fromguid; 975248571Smm int flags = drrb->drr_flags; 976248571Smm int error; 977268649Sdelphij uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); 978248571Smm dsl_dataset_t *ds; 979248571Smm const char *tofs = drba->drba_cookie->drc_tofs; 980248571Smm 981248571Smm /* already checked */ 982248571Smm ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 983248571Smm 984248571Smm if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == 985248571Smm DMU_COMPOUNDSTREAM || 986248571Smm drrb->drr_type >= DMU_OST_NUMTYPES || 987248571Smm ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) 988249195Smm return (SET_ERROR(EINVAL)); 989248571Smm 990248571Smm /* Verify pool version supports SA if SA_SPILL feature set */ 991268649Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && 992268649Sdelphij spa_version(dp->dp_spa) < SPA_VERSION_SA) 993249195Smm return (SET_ERROR(ENOTSUP)); 994248571Smm 995268649Sdelphij /* 996268649Sdelphij * The receiving code doesn't know how to translate a WRITE_EMBEDDED 997268649Sdelphij * record to a plan WRITE record, so the pool must have the 998268649Sdelphij * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED 999268649Sdelphij * records. Same with WRITE_EMBEDDED records that use LZ4 compression. 1000268649Sdelphij */ 1001268649Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) && 1002268649Sdelphij !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) 1003268649Sdelphij return (SET_ERROR(ENOTSUP)); 1004268649Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) && 1005268649Sdelphij !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) 1006268649Sdelphij return (SET_ERROR(ENOTSUP)); 1007268649Sdelphij 1008248571Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 1009248571Smm if (error == 0) { 1010248571Smm /* target fs already exists; recv into temp clone */ 1011248571Smm 1012248571Smm /* Can't recv a clone into an existing fs */ 1013248571Smm if (flags & DRR_FLAG_CLONE) { 1014248571Smm dsl_dataset_rele(ds, FTAG); 1015249195Smm return (SET_ERROR(EINVAL)); 1016248571Smm } 1017248571Smm 1018248571Smm error = recv_begin_check_existing_impl(drba, ds, fromguid); 1019248571Smm dsl_dataset_rele(ds, FTAG); 1020248571Smm } else if (error == ENOENT) { 1021248571Smm /* target fs does not exist; must be a full backup or clone */ 1022248571Smm char buf[MAXNAMELEN]; 1023248571Smm 1024248571Smm /* 1025248571Smm * If it's a non-clone incremental, we are missing the 1026248571Smm * target fs, so fail the recv. 1027248571Smm */ 1028248571Smm if (fromguid != 0 && !(flags & DRR_FLAG_CLONE)) 1029249195Smm return (SET_ERROR(ENOENT)); 1030248571Smm 1031248571Smm /* Open the parent of tofs */ 1032248571Smm ASSERT3U(strlen(tofs), <, MAXNAMELEN); 1033248571Smm (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); 1034248571Smm error = dsl_dataset_hold(dp, buf, FTAG, &ds); 1035248571Smm if (error != 0) 1036248571Smm return (error); 1037248571Smm 1038265744Sdelphij /* 1039265744Sdelphij * Check filesystem and snapshot limits before receiving. We'll 1040265744Sdelphij * recheck snapshot limits again at the end (we create the 1041265744Sdelphij * filesystems and increment those counts during begin_sync). 1042265744Sdelphij */ 1043265744Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 1044265744Sdelphij ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); 1045265744Sdelphij if (error != 0) { 1046265744Sdelphij dsl_dataset_rele(ds, FTAG); 1047265744Sdelphij return (error); 1048265744Sdelphij } 1049265744Sdelphij 1050265744Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 1051265744Sdelphij ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); 1052265744Sdelphij if (error != 0) { 1053265744Sdelphij dsl_dataset_rele(ds, FTAG); 1054265744Sdelphij return (error); 1055265744Sdelphij } 1056265744Sdelphij 1057248571Smm if (drba->drba_origin != NULL) { 1058248571Smm dsl_dataset_t *origin; 1059248571Smm error = dsl_dataset_hold(dp, drba->drba_origin, 1060248571Smm FTAG, &origin); 1061248571Smm if (error != 0) { 1062248571Smm dsl_dataset_rele(ds, FTAG); 1063248571Smm return (error); 1064248571Smm } 1065248571Smm if (!dsl_dataset_is_snapshot(origin)) { 1066248571Smm dsl_dataset_rele(origin, FTAG); 1067248571Smm dsl_dataset_rele(ds, FTAG); 1068249195Smm return (SET_ERROR(EINVAL)); 1069248571Smm } 1070248571Smm if (origin->ds_phys->ds_guid != fromguid) { 1071248571Smm dsl_dataset_rele(origin, FTAG); 1072248571Smm dsl_dataset_rele(ds, FTAG); 1073249195Smm return (SET_ERROR(ENODEV)); 1074248571Smm } 1075248571Smm dsl_dataset_rele(origin, FTAG); 1076248571Smm } 1077248571Smm dsl_dataset_rele(ds, FTAG); 1078248571Smm error = 0; 1079248571Smm } 1080248571Smm return (error); 1081248571Smm} 1082248571Smm 1083168404Spjdstatic void 1084248571Smmdmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 1085168404Spjd{ 1086248571Smm dmu_recv_begin_arg_t *drba = arg; 1087248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1088248571Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 1089248571Smm const char *tofs = drba->drba_cookie->drc_tofs; 1090248571Smm dsl_dataset_t *ds, *newds; 1091185029Spjd uint64_t dsobj; 1092248571Smm int error; 1093248571Smm uint64_t crflags; 1094168404Spjd 1095248571Smm crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? 1096248571Smm DS_FLAG_CI_DATASET : 0; 1097168404Spjd 1098248571Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 1099248571Smm if (error == 0) { 1100248571Smm /* create temporary clone */ 1101253820Sdelphij dsl_dataset_t *snap = NULL; 1102253820Sdelphij if (drba->drba_snapobj != 0) { 1103253820Sdelphij VERIFY0(dsl_dataset_hold_obj(dp, 1104253820Sdelphij drba->drba_snapobj, FTAG, &snap)); 1105253820Sdelphij } 1106248571Smm dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, 1107253820Sdelphij snap, crflags, drba->drba_cred, tx); 1108253820Sdelphij dsl_dataset_rele(snap, FTAG); 1109248571Smm dsl_dataset_rele(ds, FTAG); 1110248571Smm } else { 1111248571Smm dsl_dir_t *dd; 1112248571Smm const char *tail; 1113248571Smm dsl_dataset_t *origin = NULL; 1114248571Smm 1115248571Smm VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail)); 1116248571Smm 1117248571Smm if (drba->drba_origin != NULL) { 1118248571Smm VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, 1119248571Smm FTAG, &origin)); 1120248571Smm } 1121248571Smm 1122248571Smm /* Create new dataset. */ 1123248571Smm dsobj = dsl_dataset_create_sync(dd, 1124248571Smm strrchr(tofs, '/') + 1, 1125248571Smm origin, crflags, drba->drba_cred, tx); 1126248571Smm if (origin != NULL) 1127248571Smm dsl_dataset_rele(origin, FTAG); 1128248571Smm dsl_dir_rele(dd, FTAG); 1129248571Smm drba->drba_cookie->drc_newfs = B_TRUE; 1130248571Smm } 1131248571Smm VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); 1132248571Smm 1133248571Smm dmu_buf_will_dirty(newds->ds_dbuf, tx); 1134248571Smm newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1135248571Smm 1136219089Spjd /* 1137219089Spjd * If we actually created a non-clone, we need to create the 1138219089Spjd * objset in our new dataset. 1139219089Spjd */ 1140248571Smm if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { 1141219089Spjd (void) dmu_objset_create_impl(dp->dp_spa, 1142248571Smm newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); 1143219089Spjd } 1144168404Spjd 1145248571Smm drba->drba_cookie->drc_ds = newds; 1146185029Spjd 1147248571Smm spa_history_log_internal_ds(newds, "receive", tx, ""); 1148168404Spjd} 1149168404Spjd 1150185029Spjd/* 1151185029Spjd * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 1152185029Spjd * succeeds; otherwise we will leak the holds on the datasets. 1153185029Spjd */ 1154185029Spjdint 1155248571Smmdmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, 1156248571Smm boolean_t force, char *origin, dmu_recv_cookie_t *drc) 1157168404Spjd{ 1158248571Smm dmu_recv_begin_arg_t drba = { 0 }; 1159248571Smm dmu_replay_record_t *drr; 1160168404Spjd 1161185029Spjd bzero(drc, sizeof (dmu_recv_cookie_t)); 1162185029Spjd drc->drc_drrb = drrb; 1163185029Spjd drc->drc_tosnap = tosnap; 1164248571Smm drc->drc_tofs = tofs; 1165185029Spjd drc->drc_force = force; 1166265744Sdelphij drc->drc_cred = CRED(); 1167168404Spjd 1168248571Smm if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 1169248571Smm drc->drc_byteswap = B_TRUE; 1170248571Smm else if (drrb->drr_magic != DMU_BACKUP_MAGIC) 1171249195Smm return (SET_ERROR(EINVAL)); 1172168404Spjd 1173248571Smm drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 1174248571Smm drr->drr_type = DRR_BEGIN; 1175248571Smm drr->drr_u.drr_begin = *drc->drc_drrb; 1176248571Smm if (drc->drc_byteswap) { 1177248571Smm fletcher_4_incremental_byteswap(drr, 1178248571Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 1179248571Smm } else { 1180248571Smm fletcher_4_incremental_native(drr, 1181248571Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 1182248571Smm } 1183248571Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 1184219089Spjd 1185248571Smm if (drc->drc_byteswap) { 1186248571Smm drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1187248571Smm drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); 1188248571Smm drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1189248571Smm drrb->drr_type = BSWAP_32(drrb->drr_type); 1190248571Smm drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1191248571Smm drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1192248571Smm } 1193168404Spjd 1194248571Smm drba.drba_origin = origin; 1195248571Smm drba.drba_cookie = drc; 1196248571Smm drba.drba_cred = CRED(); 1197219089Spjd 1198248571Smm return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, 1199269006Sdelphij &drba, 5, ZFS_SPACE_CHECK_NORMAL)); 1200168404Spjd} 1201168404Spjd 1202185029Spjdstruct restorearg { 1203185029Spjd int err; 1204248571Smm boolean_t byteswap; 1205185029Spjd kthread_t *td; 1206185029Spjd struct file *fp; 1207185029Spjd char *buf; 1208185029Spjd uint64_t voff; 1209185029Spjd int bufsize; /* amount of memory allocated for buf */ 1210185029Spjd zio_cksum_t cksum; 1211219089Spjd avl_tree_t *guid_to_ds_map; 1212185029Spjd}; 1213185029Spjd 1214219089Spjdtypedef struct guid_map_entry { 1215219089Spjd uint64_t guid; 1216219089Spjd dsl_dataset_t *gme_ds; 1217219089Spjd avl_node_t avlnode; 1218219089Spjd} guid_map_entry_t; 1219219089Spjd 1220168404Spjdstatic int 1221219089Spjdguid_compare(const void *arg1, const void *arg2) 1222168404Spjd{ 1223219089Spjd const guid_map_entry_t *gmep1 = arg1; 1224219089Spjd const guid_map_entry_t *gmep2 = arg2; 1225219089Spjd 1226219089Spjd if (gmep1->guid < gmep2->guid) 1227219089Spjd return (-1); 1228219089Spjd else if (gmep1->guid > gmep2->guid) 1229219089Spjd return (1); 1230219089Spjd return (0); 1231219089Spjd} 1232219089Spjd 1233219089Spjdstatic void 1234219089Spjdfree_guid_map_onexit(void *arg) 1235219089Spjd{ 1236219089Spjd avl_tree_t *ca = arg; 1237219089Spjd void *cookie = NULL; 1238219089Spjd guid_map_entry_t *gmep; 1239219089Spjd 1240219089Spjd while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { 1241248571Smm dsl_dataset_long_rele(gmep->gme_ds, gmep); 1242249196Smm dsl_dataset_rele(gmep->gme_ds, gmep); 1243219089Spjd kmem_free(gmep, sizeof (guid_map_entry_t)); 1244219089Spjd } 1245219089Spjd avl_destroy(ca); 1246219089Spjd kmem_free(ca, sizeof (avl_tree_t)); 1247219089Spjd} 1248219089Spjd 1249219089Spjdstatic int 1250219089Spjdrestore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid) 1251219089Spjd{ 1252168404Spjd struct uio auio; 1253168404Spjd struct iovec aiov; 1254168404Spjd int error; 1255168404Spjd 1256168404Spjd aiov.iov_base = buf; 1257168404Spjd aiov.iov_len = len; 1258168404Spjd auio.uio_iov = &aiov; 1259168404Spjd auio.uio_iovcnt = 1; 1260168404Spjd auio.uio_resid = len; 1261169170Spjd auio.uio_segflg = UIO_SYSSPACE; 1262168404Spjd auio.uio_rw = UIO_READ; 1263168404Spjd auio.uio_offset = off; 1264168404Spjd auio.uio_td = ra->td; 1265168404Spjd#ifdef _KERNEL 1266168404Spjd error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 1267168404Spjd#else 1268168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 1269168404Spjd error = EOPNOTSUPP; 1270168404Spjd#endif 1271168404Spjd *resid = auio.uio_resid; 1272168404Spjd return (error); 1273168404Spjd} 1274168404Spjd 1275168404Spjdstatic void * 1276168404Spjdrestore_read(struct restorearg *ra, int len) 1277168404Spjd{ 1278168404Spjd void *rv; 1279185029Spjd int done = 0; 1280168404Spjd 1281168404Spjd /* some things will require 8-byte alignment, so everything must */ 1282240415Smm ASSERT0(len % 8); 1283168404Spjd 1284185029Spjd while (done < len) { 1285219089Spjd ssize_t resid; 1286168404Spjd 1287185029Spjd ra->err = restore_bytes(ra, (caddr_t)ra->buf + done, 1288185029Spjd len - done, ra->voff, &resid); 1289168404Spjd 1290185029Spjd if (resid == len - done) 1291249195Smm ra->err = SET_ERROR(EINVAL); 1292185029Spjd ra->voff += len - done - resid; 1293185029Spjd done = len - resid; 1294248571Smm if (ra->err != 0) 1295168404Spjd return (NULL); 1296168404Spjd } 1297168404Spjd 1298185029Spjd ASSERT3U(done, ==, len); 1299185029Spjd rv = ra->buf; 1300168404Spjd if (ra->byteswap) 1301185029Spjd fletcher_4_incremental_byteswap(rv, len, &ra->cksum); 1302168404Spjd else 1303185029Spjd fletcher_4_incremental_native(rv, len, &ra->cksum); 1304168404Spjd return (rv); 1305168404Spjd} 1306168404Spjd 1307168404Spjdstatic void 1308168404Spjdbackup_byteswap(dmu_replay_record_t *drr) 1309168404Spjd{ 1310168404Spjd#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 1311168404Spjd#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 1312168404Spjd drr->drr_type = BSWAP_32(drr->drr_type); 1313185029Spjd drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 1314168404Spjd switch (drr->drr_type) { 1315168404Spjd case DRR_BEGIN: 1316168404Spjd DO64(drr_begin.drr_magic); 1317219089Spjd DO64(drr_begin.drr_versioninfo); 1318168404Spjd DO64(drr_begin.drr_creation_time); 1319168404Spjd DO32(drr_begin.drr_type); 1320185029Spjd DO32(drr_begin.drr_flags); 1321168404Spjd DO64(drr_begin.drr_toguid); 1322168404Spjd DO64(drr_begin.drr_fromguid); 1323168404Spjd break; 1324168404Spjd case DRR_OBJECT: 1325168404Spjd DO64(drr_object.drr_object); 1326168404Spjd DO32(drr_object.drr_type); 1327168404Spjd DO32(drr_object.drr_bonustype); 1328168404Spjd DO32(drr_object.drr_blksz); 1329168404Spjd DO32(drr_object.drr_bonuslen); 1330219089Spjd DO64(drr_object.drr_toguid); 1331168404Spjd break; 1332168404Spjd case DRR_FREEOBJECTS: 1333168404Spjd DO64(drr_freeobjects.drr_firstobj); 1334168404Spjd DO64(drr_freeobjects.drr_numobjs); 1335219089Spjd DO64(drr_freeobjects.drr_toguid); 1336168404Spjd break; 1337168404Spjd case DRR_WRITE: 1338168404Spjd DO64(drr_write.drr_object); 1339168404Spjd DO32(drr_write.drr_type); 1340168404Spjd DO64(drr_write.drr_offset); 1341168404Spjd DO64(drr_write.drr_length); 1342219089Spjd DO64(drr_write.drr_toguid); 1343219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); 1344219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); 1345219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); 1346219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); 1347219089Spjd DO64(drr_write.drr_key.ddk_prop); 1348168404Spjd break; 1349219089Spjd case DRR_WRITE_BYREF: 1350219089Spjd DO64(drr_write_byref.drr_object); 1351219089Spjd DO64(drr_write_byref.drr_offset); 1352219089Spjd DO64(drr_write_byref.drr_length); 1353219089Spjd DO64(drr_write_byref.drr_toguid); 1354219089Spjd DO64(drr_write_byref.drr_refguid); 1355219089Spjd DO64(drr_write_byref.drr_refobject); 1356219089Spjd DO64(drr_write_byref.drr_refoffset); 1357219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); 1358219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); 1359219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); 1360219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); 1361219089Spjd DO64(drr_write_byref.drr_key.ddk_prop); 1362219089Spjd break; 1363268649Sdelphij case DRR_WRITE_EMBEDDED: 1364268649Sdelphij DO64(drr_write_embedded.drr_object); 1365268649Sdelphij DO64(drr_write_embedded.drr_offset); 1366268649Sdelphij DO64(drr_write_embedded.drr_length); 1367268649Sdelphij DO64(drr_write_embedded.drr_toguid); 1368268649Sdelphij DO32(drr_write_embedded.drr_lsize); 1369268649Sdelphij DO32(drr_write_embedded.drr_psize); 1370268649Sdelphij break; 1371168404Spjd case DRR_FREE: 1372168404Spjd DO64(drr_free.drr_object); 1373168404Spjd DO64(drr_free.drr_offset); 1374168404Spjd DO64(drr_free.drr_length); 1375219089Spjd DO64(drr_free.drr_toguid); 1376168404Spjd break; 1377219089Spjd case DRR_SPILL: 1378219089Spjd DO64(drr_spill.drr_object); 1379219089Spjd DO64(drr_spill.drr_length); 1380219089Spjd DO64(drr_spill.drr_toguid); 1381219089Spjd break; 1382168404Spjd case DRR_END: 1383168404Spjd DO64(drr_end.drr_checksum.zc_word[0]); 1384168404Spjd DO64(drr_end.drr_checksum.zc_word[1]); 1385168404Spjd DO64(drr_end.drr_checksum.zc_word[2]); 1386168404Spjd DO64(drr_end.drr_checksum.zc_word[3]); 1387219089Spjd DO64(drr_end.drr_toguid); 1388168404Spjd break; 1389168404Spjd } 1390168404Spjd#undef DO64 1391168404Spjd#undef DO32 1392168404Spjd} 1393168404Spjd 1394168404Spjdstatic int 1395168404Spjdrestore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 1396168404Spjd{ 1397168404Spjd int err; 1398168404Spjd dmu_tx_t *tx; 1399200727Sdelphij void *data = NULL; 1400168404Spjd 1401168404Spjd if (drro->drr_type == DMU_OT_NONE || 1402236884Smm !DMU_OT_IS_VALID(drro->drr_type) || 1403236884Smm !DMU_OT_IS_VALID(drro->drr_bonustype) || 1404219089Spjd drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || 1405168404Spjd drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1406168404Spjd P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1407168404Spjd drro->drr_blksz < SPA_MINBLOCKSIZE || 1408168404Spjd drro->drr_blksz > SPA_MAXBLOCKSIZE || 1409168404Spjd drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1410249195Smm return (SET_ERROR(EINVAL)); 1411168404Spjd } 1412168404Spjd 1413200726Sdelphij err = dmu_object_info(os, drro->drr_object, NULL); 1414168404Spjd 1415200726Sdelphij if (err != 0 && err != ENOENT) 1416249195Smm return (SET_ERROR(EINVAL)); 1417200726Sdelphij 1418201756Sdelphij if (drro->drr_bonuslen) { 1419201756Sdelphij data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); 1420248571Smm if (ra->err != 0) 1421201756Sdelphij return (ra->err); 1422201756Sdelphij } 1423201756Sdelphij 1424168404Spjd if (err == ENOENT) { 1425168404Spjd /* currently free, want to be allocated */ 1426200726Sdelphij tx = dmu_tx_create(os); 1427168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1428168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1429248571Smm if (err != 0) { 1430168404Spjd dmu_tx_abort(tx); 1431168404Spjd return (err); 1432168404Spjd } 1433168404Spjd err = dmu_object_claim(os, drro->drr_object, 1434168404Spjd drro->drr_type, drro->drr_blksz, 1435168404Spjd drro->drr_bonustype, drro->drr_bonuslen, tx); 1436200726Sdelphij dmu_tx_commit(tx); 1437168404Spjd } else { 1438168404Spjd /* currently allocated, want to be allocated */ 1439168404Spjd err = dmu_object_reclaim(os, drro->drr_object, 1440168404Spjd drro->drr_type, drro->drr_blksz, 1441200726Sdelphij drro->drr_bonustype, drro->drr_bonuslen); 1442168404Spjd } 1443248571Smm if (err != 0) { 1444249195Smm return (SET_ERROR(EINVAL)); 1445219089Spjd } 1446200726Sdelphij 1447200726Sdelphij tx = dmu_tx_create(os); 1448200726Sdelphij dmu_tx_hold_bonus(tx, drro->drr_object); 1449200726Sdelphij err = dmu_tx_assign(tx, TXG_WAIT); 1450248571Smm if (err != 0) { 1451200726Sdelphij dmu_tx_abort(tx); 1452200726Sdelphij return (err); 1453168404Spjd } 1454168404Spjd 1455219089Spjd dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, 1456219089Spjd tx); 1457168404Spjd dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1458168404Spjd 1459200727Sdelphij if (data != NULL) { 1460168404Spjd dmu_buf_t *db; 1461200727Sdelphij 1462168404Spjd VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1463168404Spjd dmu_buf_will_dirty(db, tx); 1464168404Spjd 1465185029Spjd ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 1466185029Spjd bcopy(data, db->db_data, drro->drr_bonuslen); 1467168404Spjd if (ra->byteswap) { 1468236884Smm dmu_object_byteswap_t byteswap = 1469236884Smm DMU_OT_BYTESWAP(drro->drr_bonustype); 1470236884Smm dmu_ot_byteswap[byteswap].ob_func(db->db_data, 1471168404Spjd drro->drr_bonuslen); 1472168404Spjd } 1473168404Spjd dmu_buf_rele(db, FTAG); 1474168404Spjd } 1475168404Spjd dmu_tx_commit(tx); 1476168404Spjd return (0); 1477168404Spjd} 1478168404Spjd 1479168404Spjd/* ARGSUSED */ 1480168404Spjdstatic int 1481168404Spjdrestore_freeobjects(struct restorearg *ra, objset_t *os, 1482168404Spjd struct drr_freeobjects *drrfo) 1483168404Spjd{ 1484168404Spjd uint64_t obj; 1485168404Spjd 1486168404Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1487249195Smm return (SET_ERROR(EINVAL)); 1488168404Spjd 1489168404Spjd for (obj = drrfo->drr_firstobj; 1490168404Spjd obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 1491168404Spjd (void) dmu_object_next(os, &obj, FALSE, 0)) { 1492168404Spjd int err; 1493168404Spjd 1494168404Spjd if (dmu_object_info(os, obj, NULL) != 0) 1495168404Spjd continue; 1496168404Spjd 1497254753Sdelphij err = dmu_free_long_object(os, obj); 1498248571Smm if (err != 0) 1499168404Spjd return (err); 1500168404Spjd } 1501168404Spjd return (0); 1502168404Spjd} 1503168404Spjd 1504168404Spjdstatic int 1505168404Spjdrestore_write(struct restorearg *ra, objset_t *os, 1506168404Spjd struct drr_write *drrw) 1507168404Spjd{ 1508168404Spjd dmu_tx_t *tx; 1509168404Spjd void *data; 1510168404Spjd int err; 1511168404Spjd 1512168404Spjd if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1513236884Smm !DMU_OT_IS_VALID(drrw->drr_type)) 1514249195Smm return (SET_ERROR(EINVAL)); 1515168404Spjd 1516168404Spjd data = restore_read(ra, drrw->drr_length); 1517168404Spjd if (data == NULL) 1518168404Spjd return (ra->err); 1519168404Spjd 1520168404Spjd if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1521249195Smm return (SET_ERROR(EINVAL)); 1522168404Spjd 1523168404Spjd tx = dmu_tx_create(os); 1524168404Spjd 1525168404Spjd dmu_tx_hold_write(tx, drrw->drr_object, 1526168404Spjd drrw->drr_offset, drrw->drr_length); 1527168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1528248571Smm if (err != 0) { 1529168404Spjd dmu_tx_abort(tx); 1530168404Spjd return (err); 1531168404Spjd } 1532236884Smm if (ra->byteswap) { 1533236884Smm dmu_object_byteswap_t byteswap = 1534236884Smm DMU_OT_BYTESWAP(drrw->drr_type); 1535236884Smm dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); 1536236884Smm } 1537168404Spjd dmu_write(os, drrw->drr_object, 1538168404Spjd drrw->drr_offset, drrw->drr_length, data, tx); 1539168404Spjd dmu_tx_commit(tx); 1540168404Spjd return (0); 1541168404Spjd} 1542168404Spjd 1543219089Spjd/* 1544219089Spjd * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed 1545219089Spjd * streams to refer to a copy of the data that is already on the 1546219089Spjd * system because it came in earlier in the stream. This function 1547219089Spjd * finds the earlier copy of the data, and uses that copy instead of 1548219089Spjd * data from the stream to fulfill this write. 1549219089Spjd */ 1550219089Spjdstatic int 1551219089Spjdrestore_write_byref(struct restorearg *ra, objset_t *os, 1552219089Spjd struct drr_write_byref *drrwbr) 1553219089Spjd{ 1554219089Spjd dmu_tx_t *tx; 1555219089Spjd int err; 1556219089Spjd guid_map_entry_t gmesrch; 1557219089Spjd guid_map_entry_t *gmep; 1558268649Sdelphij avl_index_t where; 1559219089Spjd objset_t *ref_os = NULL; 1560219089Spjd dmu_buf_t *dbp; 1561219089Spjd 1562219089Spjd if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) 1563249195Smm return (SET_ERROR(EINVAL)); 1564219089Spjd 1565219089Spjd /* 1566219089Spjd * If the GUID of the referenced dataset is different from the 1567219089Spjd * GUID of the target dataset, find the referenced dataset. 1568219089Spjd */ 1569219089Spjd if (drrwbr->drr_toguid != drrwbr->drr_refguid) { 1570219089Spjd gmesrch.guid = drrwbr->drr_refguid; 1571219089Spjd if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, 1572219089Spjd &where)) == NULL) { 1573249195Smm return (SET_ERROR(EINVAL)); 1574219089Spjd } 1575219089Spjd if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) 1576249195Smm return (SET_ERROR(EINVAL)); 1577219089Spjd } else { 1578219089Spjd ref_os = os; 1579219089Spjd } 1580219089Spjd 1581268649Sdelphij err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, 1582268649Sdelphij drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); 1583268649Sdelphij if (err != 0) 1584219089Spjd return (err); 1585219089Spjd 1586219089Spjd tx = dmu_tx_create(os); 1587219089Spjd 1588219089Spjd dmu_tx_hold_write(tx, drrwbr->drr_object, 1589219089Spjd drrwbr->drr_offset, drrwbr->drr_length); 1590219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1591248571Smm if (err != 0) { 1592219089Spjd dmu_tx_abort(tx); 1593219089Spjd return (err); 1594219089Spjd } 1595219089Spjd dmu_write(os, drrwbr->drr_object, 1596219089Spjd drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); 1597219089Spjd dmu_buf_rele(dbp, FTAG); 1598219089Spjd dmu_tx_commit(tx); 1599219089Spjd return (0); 1600219089Spjd} 1601219089Spjd 1602219089Spjdstatic int 1603268649Sdelphijrestore_write_embedded(struct restorearg *ra, objset_t *os, 1604268649Sdelphij struct drr_write_embedded *drrwnp) 1605268649Sdelphij{ 1606268649Sdelphij dmu_tx_t *tx; 1607268649Sdelphij int err; 1608268649Sdelphij void *data; 1609268649Sdelphij 1610268649Sdelphij if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset) 1611268649Sdelphij return (EINVAL); 1612268649Sdelphij 1613268649Sdelphij if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE) 1614268649Sdelphij return (EINVAL); 1615268649Sdelphij 1616268649Sdelphij if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES) 1617268649Sdelphij return (EINVAL); 1618268649Sdelphij if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) 1619268649Sdelphij return (EINVAL); 1620268649Sdelphij 1621268649Sdelphij data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8)); 1622268649Sdelphij if (data == NULL) 1623268649Sdelphij return (ra->err); 1624268649Sdelphij 1625268649Sdelphij tx = dmu_tx_create(os); 1626268649Sdelphij 1627268649Sdelphij dmu_tx_hold_write(tx, drrwnp->drr_object, 1628268649Sdelphij drrwnp->drr_offset, drrwnp->drr_length); 1629268649Sdelphij err = dmu_tx_assign(tx, TXG_WAIT); 1630268649Sdelphij if (err != 0) { 1631268649Sdelphij dmu_tx_abort(tx); 1632268649Sdelphij return (err); 1633268649Sdelphij } 1634268649Sdelphij 1635268649Sdelphij dmu_write_embedded(os, drrwnp->drr_object, 1636268649Sdelphij drrwnp->drr_offset, data, drrwnp->drr_etype, 1637268649Sdelphij drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize, 1638268649Sdelphij ra->byteswap ^ ZFS_HOST_BYTEORDER, tx); 1639268649Sdelphij 1640268649Sdelphij dmu_tx_commit(tx); 1641268649Sdelphij return (0); 1642268649Sdelphij} 1643268649Sdelphij 1644268649Sdelphijstatic int 1645219089Spjdrestore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) 1646219089Spjd{ 1647219089Spjd dmu_tx_t *tx; 1648219089Spjd void *data; 1649219089Spjd dmu_buf_t *db, *db_spill; 1650219089Spjd int err; 1651219089Spjd 1652219089Spjd if (drrs->drr_length < SPA_MINBLOCKSIZE || 1653219089Spjd drrs->drr_length > SPA_MAXBLOCKSIZE) 1654249195Smm return (SET_ERROR(EINVAL)); 1655219089Spjd 1656219089Spjd data = restore_read(ra, drrs->drr_length); 1657219089Spjd if (data == NULL) 1658219089Spjd return (ra->err); 1659219089Spjd 1660219089Spjd if (dmu_object_info(os, drrs->drr_object, NULL) != 0) 1661249195Smm return (SET_ERROR(EINVAL)); 1662219089Spjd 1663219089Spjd VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); 1664219089Spjd if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { 1665219089Spjd dmu_buf_rele(db, FTAG); 1666219089Spjd return (err); 1667219089Spjd } 1668219089Spjd 1669219089Spjd tx = dmu_tx_create(os); 1670219089Spjd 1671219089Spjd dmu_tx_hold_spill(tx, db->db_object); 1672219089Spjd 1673219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1674248571Smm if (err != 0) { 1675219089Spjd dmu_buf_rele(db, FTAG); 1676219089Spjd dmu_buf_rele(db_spill, FTAG); 1677219089Spjd dmu_tx_abort(tx); 1678219089Spjd return (err); 1679219089Spjd } 1680219089Spjd dmu_buf_will_dirty(db_spill, tx); 1681219089Spjd 1682219089Spjd if (db_spill->db_size < drrs->drr_length) 1683219089Spjd VERIFY(0 == dbuf_spill_set_blksz(db_spill, 1684219089Spjd drrs->drr_length, tx)); 1685219089Spjd bcopy(data, db_spill->db_data, drrs->drr_length); 1686219089Spjd 1687219089Spjd dmu_buf_rele(db, FTAG); 1688219089Spjd dmu_buf_rele(db_spill, FTAG); 1689219089Spjd 1690219089Spjd dmu_tx_commit(tx); 1691219089Spjd return (0); 1692219089Spjd} 1693219089Spjd 1694168404Spjd/* ARGSUSED */ 1695168404Spjdstatic int 1696168404Spjdrestore_free(struct restorearg *ra, objset_t *os, 1697168404Spjd struct drr_free *drrf) 1698168404Spjd{ 1699168404Spjd int err; 1700168404Spjd 1701168404Spjd if (drrf->drr_length != -1ULL && 1702168404Spjd drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1703249195Smm return (SET_ERROR(EINVAL)); 1704168404Spjd 1705168404Spjd if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1706249195Smm return (SET_ERROR(EINVAL)); 1707168404Spjd 1708185029Spjd err = dmu_free_long_range(os, drrf->drr_object, 1709168404Spjd drrf->drr_offset, drrf->drr_length); 1710168404Spjd return (err); 1711168404Spjd} 1712168404Spjd 1713248571Smm/* used to destroy the drc_ds on error */ 1714248571Smmstatic void 1715248571Smmdmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) 1716248571Smm{ 1717248571Smm char name[MAXNAMELEN]; 1718248571Smm dsl_dataset_name(drc->drc_ds, name); 1719248571Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1720248571Smm (void) dsl_destroy_head(name); 1721248571Smm} 1722248571Smm 1723185029Spjd/* 1724185029Spjd * NB: callers *must* call dmu_recv_end() if this succeeds. 1725185029Spjd */ 1726168404Spjdint 1727219089Spjddmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, 1728219089Spjd int cleanup_fd, uint64_t *action_handlep) 1729168404Spjd{ 1730185029Spjd struct restorearg ra = { 0 }; 1731168404Spjd dmu_replay_record_t *drr; 1732185029Spjd objset_t *os; 1733185029Spjd zio_cksum_t pcksum; 1734219089Spjd int featureflags; 1735168404Spjd 1736248571Smm ra.byteswap = drc->drc_byteswap; 1737248571Smm ra.cksum = drc->drc_cksum; 1738219089Spjd ra.td = curthread; 1739185029Spjd ra.fp = fp; 1740185029Spjd ra.voff = *voffp; 1741185029Spjd ra.bufsize = 1<<20; 1742185029Spjd ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1743168404Spjd 1744185029Spjd /* these were verified in dmu_recv_begin */ 1745248571Smm ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, 1746219089Spjd DMU_SUBSTREAM); 1747248571Smm ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES); 1748168404Spjd 1749168404Spjd /* 1750168404Spjd * Open the objset we are modifying. 1751168404Spjd */ 1752248571Smm VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); 1753168404Spjd 1754248571Smm ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); 1755168404Spjd 1756219089Spjd featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); 1757219089Spjd 1758219089Spjd /* if this stream is dedup'ed, set up the avl tree for guid mapping */ 1759219089Spjd if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 1760219089Spjd minor_t minor; 1761219089Spjd 1762219089Spjd if (cleanup_fd == -1) { 1763249195Smm ra.err = SET_ERROR(EBADF); 1764219089Spjd goto out; 1765219089Spjd } 1766219089Spjd ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); 1767248571Smm if (ra.err != 0) { 1768219089Spjd cleanup_fd = -1; 1769219089Spjd goto out; 1770219089Spjd } 1771219089Spjd 1772219089Spjd if (*action_handlep == 0) { 1773219089Spjd ra.guid_to_ds_map = 1774219089Spjd kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 1775219089Spjd avl_create(ra.guid_to_ds_map, guid_compare, 1776219089Spjd sizeof (guid_map_entry_t), 1777219089Spjd offsetof(guid_map_entry_t, avlnode)); 1778219089Spjd ra.err = zfs_onexit_add_cb(minor, 1779219089Spjd free_guid_map_onexit, ra.guid_to_ds_map, 1780219089Spjd action_handlep); 1781248571Smm if (ra.err != 0) 1782219089Spjd goto out; 1783219089Spjd } else { 1784219089Spjd ra.err = zfs_onexit_cb_data(minor, *action_handlep, 1785219089Spjd (void **)&ra.guid_to_ds_map); 1786248571Smm if (ra.err != 0) 1787219089Spjd goto out; 1788219089Spjd } 1789221263Smm 1790221263Smm drc->drc_guid_to_ds_map = ra.guid_to_ds_map; 1791219089Spjd } 1792219089Spjd 1793168404Spjd /* 1794168404Spjd * Read records and process them. 1795168404Spjd */ 1796185029Spjd pcksum = ra.cksum; 1797168404Spjd while (ra.err == 0 && 1798168404Spjd NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1799185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) { 1800249195Smm ra.err = SET_ERROR(EINTR); 1801168404Spjd goto out; 1802168404Spjd } 1803168404Spjd 1804168404Spjd if (ra.byteswap) 1805168404Spjd backup_byteswap(drr); 1806168404Spjd 1807168404Spjd switch (drr->drr_type) { 1808168404Spjd case DRR_OBJECT: 1809168404Spjd { 1810168404Spjd /* 1811168404Spjd * We need to make a copy of the record header, 1812168404Spjd * because restore_{object,write} may need to 1813168404Spjd * restore_read(), which will invalidate drr. 1814168404Spjd */ 1815168404Spjd struct drr_object drro = drr->drr_u.drr_object; 1816168404Spjd ra.err = restore_object(&ra, os, &drro); 1817168404Spjd break; 1818168404Spjd } 1819168404Spjd case DRR_FREEOBJECTS: 1820168404Spjd { 1821168404Spjd struct drr_freeobjects drrfo = 1822168404Spjd drr->drr_u.drr_freeobjects; 1823168404Spjd ra.err = restore_freeobjects(&ra, os, &drrfo); 1824168404Spjd break; 1825168404Spjd } 1826168404Spjd case DRR_WRITE: 1827168404Spjd { 1828168404Spjd struct drr_write drrw = drr->drr_u.drr_write; 1829168404Spjd ra.err = restore_write(&ra, os, &drrw); 1830168404Spjd break; 1831168404Spjd } 1832219089Spjd case DRR_WRITE_BYREF: 1833219089Spjd { 1834219089Spjd struct drr_write_byref drrwbr = 1835219089Spjd drr->drr_u.drr_write_byref; 1836219089Spjd ra.err = restore_write_byref(&ra, os, &drrwbr); 1837219089Spjd break; 1838219089Spjd } 1839268649Sdelphij case DRR_WRITE_EMBEDDED: 1840268649Sdelphij { 1841268649Sdelphij struct drr_write_embedded drrwe = 1842268649Sdelphij drr->drr_u.drr_write_embedded; 1843268649Sdelphij ra.err = restore_write_embedded(&ra, os, &drrwe); 1844268649Sdelphij break; 1845268649Sdelphij } 1846168404Spjd case DRR_FREE: 1847168404Spjd { 1848168404Spjd struct drr_free drrf = drr->drr_u.drr_free; 1849168404Spjd ra.err = restore_free(&ra, os, &drrf); 1850168404Spjd break; 1851168404Spjd } 1852168404Spjd case DRR_END: 1853168404Spjd { 1854168404Spjd struct drr_end drre = drr->drr_u.drr_end; 1855168404Spjd /* 1856168404Spjd * We compare against the *previous* checksum 1857168404Spjd * value, because the stored checksum is of 1858168404Spjd * everything before the DRR_END record. 1859168404Spjd */ 1860185029Spjd if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 1861249195Smm ra.err = SET_ERROR(ECKSUM); 1862168404Spjd goto out; 1863168404Spjd } 1864219089Spjd case DRR_SPILL: 1865219089Spjd { 1866219089Spjd struct drr_spill drrs = drr->drr_u.drr_spill; 1867219089Spjd ra.err = restore_spill(&ra, os, &drrs); 1868219089Spjd break; 1869219089Spjd } 1870168404Spjd default: 1871249195Smm ra.err = SET_ERROR(EINVAL); 1872168404Spjd goto out; 1873168404Spjd } 1874185029Spjd pcksum = ra.cksum; 1875168404Spjd } 1876185029Spjd ASSERT(ra.err != 0); 1877168404Spjd 1878168404Spjdout: 1879219089Spjd if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) 1880219089Spjd zfs_onexit_fd_rele(cleanup_fd); 1881168404Spjd 1882185029Spjd if (ra.err != 0) { 1883168404Spjd /* 1884219089Spjd * destroy what we created, so we don't leave it in the 1885219089Spjd * inconsistent restoring state. 1886168404Spjd */ 1887248571Smm dmu_recv_cleanup_ds(drc); 1888168404Spjd } 1889168404Spjd 1890168404Spjd kmem_free(ra.buf, ra.bufsize); 1891185029Spjd *voffp = ra.voff; 1892168404Spjd return (ra.err); 1893168404Spjd} 1894185029Spjd 1895185029Spjdstatic int 1896248571Smmdmu_recv_end_check(void *arg, dmu_tx_t *tx) 1897185029Spjd{ 1898248571Smm dmu_recv_cookie_t *drc = arg; 1899248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1900248571Smm int error; 1901185029Spjd 1902248571Smm ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); 1903248571Smm 1904248571Smm if (!drc->drc_newfs) { 1905248571Smm dsl_dataset_t *origin_head; 1906248571Smm 1907248571Smm error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); 1908248571Smm if (error != 0) 1909248571Smm return (error); 1910253820Sdelphij if (drc->drc_force) { 1911253820Sdelphij /* 1912253820Sdelphij * We will destroy any snapshots in tofs (i.e. before 1913253820Sdelphij * origin_head) that are after the origin (which is 1914253820Sdelphij * the snap before drc_ds, because drc_ds can not 1915253820Sdelphij * have any snaps of its own). 1916253820Sdelphij */ 1917253820Sdelphij uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1918253820Sdelphij while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1919253820Sdelphij dsl_dataset_t *snap; 1920253820Sdelphij error = dsl_dataset_hold_obj(dp, obj, FTAG, 1921253820Sdelphij &snap); 1922253820Sdelphij if (error != 0) 1923253820Sdelphij return (error); 1924253820Sdelphij if (snap->ds_dir != origin_head->ds_dir) 1925253820Sdelphij error = SET_ERROR(EINVAL); 1926253820Sdelphij if (error == 0) { 1927253820Sdelphij error = dsl_destroy_snapshot_check_impl( 1928253820Sdelphij snap, B_FALSE); 1929253820Sdelphij } 1930253820Sdelphij obj = snap->ds_phys->ds_prev_snap_obj; 1931253820Sdelphij dsl_dataset_rele(snap, FTAG); 1932253820Sdelphij if (error != 0) 1933253820Sdelphij return (error); 1934253820Sdelphij } 1935253820Sdelphij } 1936248571Smm error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, 1937253816Sdelphij origin_head, drc->drc_force, drc->drc_owner, tx); 1938248571Smm if (error != 0) { 1939248571Smm dsl_dataset_rele(origin_head, FTAG); 1940248571Smm return (error); 1941248571Smm } 1942248571Smm error = dsl_dataset_snapshot_check_impl(origin_head, 1943265744Sdelphij drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 1944248571Smm dsl_dataset_rele(origin_head, FTAG); 1945248571Smm if (error != 0) 1946248571Smm return (error); 1947248571Smm 1948248571Smm error = dsl_destroy_head_check_impl(drc->drc_ds, 1); 1949248571Smm } else { 1950248571Smm error = dsl_dataset_snapshot_check_impl(drc->drc_ds, 1951265744Sdelphij drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 1952248571Smm } 1953248571Smm return (error); 1954185029Spjd} 1955185029Spjd 1956185029Spjdstatic void 1957248571Smmdmu_recv_end_sync(void *arg, dmu_tx_t *tx) 1958185029Spjd{ 1959248571Smm dmu_recv_cookie_t *drc = arg; 1960248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1961185029Spjd 1962248571Smm spa_history_log_internal_ds(drc->drc_ds, "finish receiving", 1963248571Smm tx, "snap=%s", drc->drc_tosnap); 1964185029Spjd 1965248571Smm if (!drc->drc_newfs) { 1966248571Smm dsl_dataset_t *origin_head; 1967185029Spjd 1968248571Smm VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, 1969248571Smm &origin_head)); 1970253820Sdelphij 1971253820Sdelphij if (drc->drc_force) { 1972253820Sdelphij /* 1973253820Sdelphij * Destroy any snapshots of drc_tofs (origin_head) 1974253820Sdelphij * after the origin (the snap before drc_ds). 1975253820Sdelphij */ 1976253820Sdelphij uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1977253820Sdelphij while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1978253820Sdelphij dsl_dataset_t *snap; 1979253820Sdelphij VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, 1980253820Sdelphij &snap)); 1981253820Sdelphij ASSERT3P(snap->ds_dir, ==, origin_head->ds_dir); 1982253820Sdelphij obj = snap->ds_phys->ds_prev_snap_obj; 1983253820Sdelphij dsl_destroy_snapshot_sync_impl(snap, 1984253820Sdelphij B_FALSE, tx); 1985253820Sdelphij dsl_dataset_rele(snap, FTAG); 1986253820Sdelphij } 1987253820Sdelphij } 1988253820Sdelphij VERIFY3P(drc->drc_ds->ds_prev, ==, 1989253820Sdelphij origin_head->ds_prev); 1990253820Sdelphij 1991248571Smm dsl_dataset_clone_swap_sync_impl(drc->drc_ds, 1992248571Smm origin_head, tx); 1993248571Smm dsl_dataset_snapshot_sync_impl(origin_head, 1994248571Smm drc->drc_tosnap, tx); 1995248571Smm 1996248571Smm /* set snapshot's creation time and guid */ 1997248571Smm dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); 1998248571Smm origin_head->ds_prev->ds_phys->ds_creation_time = 1999248571Smm drc->drc_drrb->drr_creation_time; 2000248571Smm origin_head->ds_prev->ds_phys->ds_guid = 2001248571Smm drc->drc_drrb->drr_toguid; 2002248571Smm origin_head->ds_prev->ds_phys->ds_flags &= 2003248571Smm ~DS_FLAG_INCONSISTENT; 2004248571Smm 2005248571Smm dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2006248571Smm origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 2007248571Smm 2008248571Smm dsl_dataset_rele(origin_head, FTAG); 2009248571Smm dsl_destroy_head_sync_impl(drc->drc_ds, tx); 2010253816Sdelphij 2011253816Sdelphij if (drc->drc_owner != NULL) 2012253816Sdelphij VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner); 2013248571Smm } else { 2014248571Smm dsl_dataset_t *ds = drc->drc_ds; 2015248571Smm 2016248571Smm dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx); 2017248571Smm 2018248571Smm /* set snapshot's creation time and guid */ 2019248571Smm dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 2020248571Smm ds->ds_prev->ds_phys->ds_creation_time = 2021248571Smm drc->drc_drrb->drr_creation_time; 2022248571Smm ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid; 2023248571Smm ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 2024248571Smm 2025248571Smm dmu_buf_will_dirty(ds->ds_dbuf, tx); 2026248571Smm ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 2027248571Smm } 2028248571Smm drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj; 2029248571Smm /* 2030248571Smm * Release the hold from dmu_recv_begin. This must be done before 2031248571Smm * we return to open context, so that when we free the dataset's dnode, 2032248571Smm * we can evict its bonus buffer. 2033248571Smm */ 2034248571Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 2035248571Smm drc->drc_ds = NULL; 2036185029Spjd} 2037185029Spjd 2038219089Spjdstatic int 2039248571Smmadd_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) 2040221263Smm{ 2041248571Smm dsl_pool_t *dp; 2042221263Smm dsl_dataset_t *snapds; 2043221263Smm guid_map_entry_t *gmep; 2044221263Smm int err; 2045221263Smm 2046221263Smm ASSERT(guid_map != NULL); 2047221263Smm 2048248571Smm err = dsl_pool_hold(name, FTAG, &dp); 2049248571Smm if (err != 0) 2050248571Smm return (err); 2051249356Smm gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); 2052249196Smm err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); 2053221263Smm if (err == 0) { 2054221263Smm gmep->guid = snapds->ds_phys->ds_guid; 2055221263Smm gmep->gme_ds = snapds; 2056221263Smm avl_add(guid_map, gmep); 2057248571Smm dsl_dataset_long_hold(snapds, gmep); 2058249196Smm } else 2059249356Smm kmem_free(gmep, sizeof (*gmep)); 2060221263Smm 2061248571Smm dsl_pool_rele(dp, FTAG); 2062221263Smm return (err); 2063221263Smm} 2064221263Smm 2065248571Smmstatic int dmu_recv_end_modified_blocks = 3; 2066248571Smm 2067221263Smmstatic int 2068219089Spjddmu_recv_existing_end(dmu_recv_cookie_t *drc) 2069185029Spjd{ 2070248571Smm int error; 2071248571Smm char name[MAXNAMELEN]; 2072185029Spjd 2073248571Smm#ifdef _KERNEL 2074248571Smm /* 2075248571Smm * We will be destroying the ds; make sure its origin is unmounted if 2076248571Smm * necessary. 2077248571Smm */ 2078248571Smm dsl_dataset_name(drc->drc_ds, name); 2079248571Smm zfs_destroy_unmount_origin(name); 2080248571Smm#endif 2081185029Spjd 2082248571Smm error = dsl_sync_task(drc->drc_tofs, 2083248571Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 2084269006Sdelphij dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL); 2085185029Spjd 2086248571Smm if (error != 0) 2087248571Smm dmu_recv_cleanup_ds(drc); 2088248571Smm return (error); 2089185029Spjd} 2090219089Spjd 2091219089Spjdstatic int 2092219089Spjddmu_recv_new_end(dmu_recv_cookie_t *drc) 2093219089Spjd{ 2094248571Smm int error; 2095219089Spjd 2096248571Smm error = dsl_sync_task(drc->drc_tofs, 2097248571Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 2098269006Sdelphij dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL); 2099219089Spjd 2100248571Smm if (error != 0) { 2101248571Smm dmu_recv_cleanup_ds(drc); 2102248571Smm } else if (drc->drc_guid_to_ds_map != NULL) { 2103248571Smm (void) add_ds_to_guidmap(drc->drc_tofs, 2104248571Smm drc->drc_guid_to_ds_map, 2105248571Smm drc->drc_newsnapobj); 2106219089Spjd } 2107248571Smm return (error); 2108219089Spjd} 2109219089Spjd 2110219089Spjdint 2111253816Sdelphijdmu_recv_end(dmu_recv_cookie_t *drc, void *owner) 2112219089Spjd{ 2113253816Sdelphij drc->drc_owner = owner; 2114253816Sdelphij 2115248571Smm if (drc->drc_newfs) 2116248571Smm return (dmu_recv_new_end(drc)); 2117248571Smm else 2118219089Spjd return (dmu_recv_existing_end(drc)); 2119219089Spjd} 2120253821Sdelphij 2121253821Sdelphij/* 2122253821Sdelphij * Return TRUE if this objset is currently being received into. 2123253821Sdelphij */ 2124253821Sdelphijboolean_t 2125253821Sdelphijdmu_objset_is_receiving(objset_t *os) 2126253821Sdelphij{ 2127253821Sdelphij return (os->os_dsl_dataset != NULL && 2128253821Sdelphij os->os_dsl_dataset->ds_owner == dmu_recv_tag); 2129253821Sdelphij} 2130