1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 2012, 2020 by Delphix. All rights reserved. 24 * Copyright (c) 2013 Steven Hartland. All rights reserved. 25 * Copyright (c) 2017 Datto Inc. 26 * Copyright 2017 RackTop Systems. 27 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. 28 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. 29 */ 30 31/* 32 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. 33 * It has the following characteristics: 34 * 35 * - Thread Safe. libzfs_core is accessible concurrently from multiple 36 * threads. This is accomplished primarily by avoiding global data 37 * (e.g. caching). Since it's thread-safe, there is no reason for a 38 * process to have multiple libzfs "instances". Therefore, we store 39 * our few pieces of data (e.g. the file descriptor) in global 40 * variables. The fd is reference-counted so that the libzfs_core 41 * library can be "initialized" multiple times (e.g. by different 42 * consumers within the same process). 43 * 44 * - Committed Interface. The libzfs_core interface will be committed, 45 * therefore consumers can compile against it and be confident that 46 * their code will continue to work on future releases of this code. 47 * Currently, the interface is Evolving (not Committed), but we intend 48 * to commit to it once it is more complete and we determine that it 49 * meets the needs of all consumers. 50 * 51 * - Programmatic Error Handling. libzfs_core communicates errors with 52 * defined error numbers, and doesn't print anything to stdout/stderr. 53 * 54 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments 55 * to/from the kernel ioctls. There is generally a 1:1 correspondence 56 * between libzfs_core functions and ioctls to ZFS_DEV. 57 * 58 * - Clear Atomicity. Because libzfs_core functions are generally 1:1 59 * with kernel ioctls, and kernel ioctls are general atomic, each 60 * libzfs_core function is atomic. For example, creating multiple 61 * snapshots with a single call to lzc_snapshot() is atomic -- it 62 * can't fail with only some of the requested snapshots created, even 63 * in the event of power loss or system crash. 64 * 65 * - Continued libzfs Support. Some higher-level operations (e.g. 66 * support for "zfs send -R") are too complicated to fit the scope of 67 * libzfs_core. This functionality will continue to live in libzfs. 68 * Where appropriate, libzfs will use the underlying atomic operations 69 * of libzfs_core. For example, libzfs may implement "zfs send -R | 70 * zfs receive" by using individual "send one snapshot", rename, 71 * destroy, and "receive one snapshot" operations in libzfs_core. 72 * /sbin/zfs and /sbin/zpool will link with both libzfs and 73 * libzfs_core. Other consumers should aim to use only libzfs_core, 74 * since that will be the supported, stable interface going forwards. 75 */ 76 77#include <libzfs_core.h> 78#include <ctype.h> 79#include <unistd.h> 80#include <stdlib.h> 81#include <string.h> 82#ifdef ZFS_DEBUG 83#include <stdio.h> 84#endif 85#include <errno.h> 86#include <fcntl.h> 87#include <pthread.h> 88#include <libzutil.h> 89#include <sys/nvpair.h> 90#include <sys/param.h> 91#include <sys/types.h> 92#include <sys/stat.h> 93#include <sys/zfs_ioctl.h> 94 95static int g_fd = -1; 96static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; 97static int g_refcount; 98 99#ifdef ZFS_DEBUG 100static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST; 101static zfs_errno_t fail_ioc_err; 102 103static void 104libzfs_core_debug_ioc(void) 105{ 106 /* 107 * To test running newer user space binaries with kernel's 108 * that don't yet support an ioctl or a new ioctl arg we 109 * provide an override to intentionally fail an ioctl. 110 * 111 * USAGE: 112 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err" 113 * 114 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a 115 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029" 116 * 117 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank" 118 * cannot checkpoint 'tank': the loaded zfs module does not support 119 * this operation. A reboot may be required to enable this operation. 120 */ 121 if (fail_ioc_cmd == ZFS_IOC_LAST) { 122 char *ioc_test = getenv("ZFS_IOC_TEST"); 123 unsigned int ioc_num = 0, ioc_err = 0; 124 125 if (ioc_test != NULL && 126 sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 && 127 ioc_num < ZFS_IOC_LAST) { 128 fail_ioc_cmd = ioc_num; 129 fail_ioc_err = ioc_err; 130 } 131 } 132} 133#endif 134 135int 136libzfs_core_init(void) 137{ 138 (void) pthread_mutex_lock(&g_lock); 139 if (g_refcount == 0) { 140 g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC); 141 if (g_fd < 0) { 142 (void) pthread_mutex_unlock(&g_lock); 143 return (errno); 144 } 145 } 146 g_refcount++; 147 148#ifdef ZFS_DEBUG 149 libzfs_core_debug_ioc(); 150#endif 151 (void) pthread_mutex_unlock(&g_lock); 152 return (0); 153} 154 155void 156libzfs_core_fini(void) 157{ 158 (void) pthread_mutex_lock(&g_lock); 159 ASSERT3S(g_refcount, >, 0); 160 161 if (g_refcount > 0) 162 g_refcount--; 163 164 if (g_refcount == 0 && g_fd != -1) { 165 (void) close(g_fd); 166 g_fd = -1; 167 } 168 (void) pthread_mutex_unlock(&g_lock); 169} 170 171static int 172lzc_ioctl(zfs_ioc_t ioc, const char *name, 173 nvlist_t *source, nvlist_t **resultp) 174{ 175 zfs_cmd_t zc = {"\0"}; 176 int error = 0; 177 char *packed = NULL; 178 size_t size = 0; 179 180 ASSERT3S(g_refcount, >, 0); 181 VERIFY3S(g_fd, !=, -1); 182 183#ifdef ZFS_DEBUG 184 if (ioc == fail_ioc_cmd) 185 return (fail_ioc_err); 186#endif 187 188 if (name != NULL) 189 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 190 191 if (source != NULL) { 192 packed = fnvlist_pack(source, &size); 193 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 194 zc.zc_nvlist_src_size = size; 195 } 196 197 if (resultp != NULL) { 198 *resultp = NULL; 199 if (ioc == ZFS_IOC_CHANNEL_PROGRAM) { 200 zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source, 201 ZCP_ARG_MEMLIMIT); 202 } else { 203 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); 204 } 205 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 206 malloc(zc.zc_nvlist_dst_size); 207 if (zc.zc_nvlist_dst == (uint64_t)0) { 208 error = ENOMEM; 209 goto out; 210 } 211 } 212 213 while (zfs_ioctl_fd(g_fd, ioc, &zc) != 0) { 214 /* 215 * If ioctl exited with ENOMEM, we retry the ioctl after 216 * increasing the size of the destination nvlist. 217 * 218 * Channel programs that exit with ENOMEM ran over the 219 * lua memory sandbox; they should not be retried. 220 */ 221 if (errno == ENOMEM && resultp != NULL && 222 ioc != ZFS_IOC_CHANNEL_PROGRAM) { 223 free((void *)(uintptr_t)zc.zc_nvlist_dst); 224 zc.zc_nvlist_dst_size *= 2; 225 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 226 malloc(zc.zc_nvlist_dst_size); 227 if (zc.zc_nvlist_dst == (uint64_t)0) { 228 error = ENOMEM; 229 goto out; 230 } 231 } else { 232 error = errno; 233 break; 234 } 235 } 236 if (zc.zc_nvlist_dst_filled) { 237 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, 238 zc.zc_nvlist_dst_size); 239 } 240 241out: 242 if (packed != NULL) 243 fnvlist_pack_free(packed, size); 244 free((void *)(uintptr_t)zc.zc_nvlist_dst); 245 return (error); 246} 247 248int 249lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props, 250 uint8_t *wkeydata, uint_t wkeylen) 251{ 252 int error; 253 nvlist_t *hidden_args = NULL; 254 nvlist_t *args = fnvlist_alloc(); 255 256 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); 257 if (props != NULL) 258 fnvlist_add_nvlist(args, "props", props); 259 260 if (wkeydata != NULL) { 261 hidden_args = fnvlist_alloc(); 262 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, 263 wkeylen); 264 fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args); 265 } 266 267 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); 268 nvlist_free(hidden_args); 269 nvlist_free(args); 270 return (error); 271} 272 273int 274lzc_clone(const char *fsname, const char *origin, nvlist_t *props) 275{ 276 int error; 277 nvlist_t *hidden_args = NULL; 278 nvlist_t *args = fnvlist_alloc(); 279 280 fnvlist_add_string(args, "origin", origin); 281 if (props != NULL) 282 fnvlist_add_nvlist(args, "props", props); 283 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); 284 nvlist_free(hidden_args); 285 nvlist_free(args); 286 return (error); 287} 288 289int 290lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen) 291{ 292 /* 293 * The promote ioctl is still legacy, so we need to construct our 294 * own zfs_cmd_t rather than using lzc_ioctl(). 295 */ 296 zfs_cmd_t zc = {"\0"}; 297 298 ASSERT3S(g_refcount, >, 0); 299 VERIFY3S(g_fd, !=, -1); 300 301 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); 302 if (zfs_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) { 303 int error = errno; 304 if (error == EEXIST && snapnamebuf != NULL) 305 (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen); 306 return (error); 307 } 308 return (0); 309} 310 311int 312lzc_rename(const char *source, const char *target) 313{ 314 zfs_cmd_t zc = {"\0"}; 315 int error; 316 317 ASSERT3S(g_refcount, >, 0); 318 VERIFY3S(g_fd, !=, -1); 319 (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name)); 320 (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); 321 error = zfs_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc); 322 if (error != 0) 323 error = errno; 324 return (error); 325} 326int 327lzc_destroy(const char *fsname) 328{ 329 int error; 330 nvlist_t *args = fnvlist_alloc(); 331 error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL); 332 nvlist_free(args); 333 return (error); 334} 335 336/* 337 * Creates snapshots. 338 * 339 * The keys in the snaps nvlist are the snapshots to be created. 340 * They must all be in the same pool. 341 * 342 * The props nvlist is properties to set. Currently only user properties 343 * are supported. { user:prop_name -> string value } 344 * 345 * The returned results nvlist will have an entry for each snapshot that failed. 346 * The value will be the (int32) error code. 347 * 348 * The return value will be 0 if all snapshots were created, otherwise it will 349 * be the errno of a (unspecified) snapshot that failed. 350 */ 351int 352lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) 353{ 354 nvpair_t *elem; 355 nvlist_t *args; 356 int error; 357 char pool[ZFS_MAX_DATASET_NAME_LEN]; 358 359 *errlist = NULL; 360 361 /* determine the pool name */ 362 elem = nvlist_next_nvpair(snaps, NULL); 363 if (elem == NULL) 364 return (0); 365 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 366 pool[strcspn(pool, "/@")] = '\0'; 367 368 args = fnvlist_alloc(); 369 fnvlist_add_nvlist(args, "snaps", snaps); 370 if (props != NULL) 371 fnvlist_add_nvlist(args, "props", props); 372 373 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); 374 nvlist_free(args); 375 376 return (error); 377} 378 379/* 380 * Destroys snapshots. 381 * 382 * The keys in the snaps nvlist are the snapshots to be destroyed. 383 * They must all be in the same pool. 384 * 385 * Snapshots that do not exist will be silently ignored. 386 * 387 * If 'defer' is not set, and a snapshot has user holds or clones, the 388 * destroy operation will fail and none of the snapshots will be 389 * destroyed. 390 * 391 * If 'defer' is set, and a snapshot has user holds or clones, it will be 392 * marked for deferred destruction, and will be destroyed when the last hold 393 * or clone is removed/destroyed. 394 * 395 * The return value will be 0 if all snapshots were destroyed (or marked for 396 * later destruction if 'defer' is set) or didn't exist to begin with. 397 * 398 * Otherwise the return value will be the errno of a (unspecified) snapshot 399 * that failed, no snapshots will be destroyed, and the errlist will have an 400 * entry for each snapshot that failed. The value in the errlist will be 401 * the (int32) error code. 402 */ 403int 404lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) 405{ 406 nvpair_t *elem; 407 nvlist_t *args; 408 int error; 409 char pool[ZFS_MAX_DATASET_NAME_LEN]; 410 411 /* determine the pool name */ 412 elem = nvlist_next_nvpair(snaps, NULL); 413 if (elem == NULL) 414 return (0); 415 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 416 pool[strcspn(pool, "/@")] = '\0'; 417 418 args = fnvlist_alloc(); 419 fnvlist_add_nvlist(args, "snaps", snaps); 420 if (defer) 421 fnvlist_add_boolean(args, "defer"); 422 423 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); 424 nvlist_free(args); 425 426 return (error); 427} 428 429int 430lzc_snaprange_space(const char *firstsnap, const char *lastsnap, 431 uint64_t *usedp) 432{ 433 nvlist_t *args; 434 nvlist_t *result; 435 int err; 436 char fs[ZFS_MAX_DATASET_NAME_LEN]; 437 char *atp; 438 439 /* determine the fs name */ 440 (void) strlcpy(fs, firstsnap, sizeof (fs)); 441 atp = strchr(fs, '@'); 442 if (atp == NULL) 443 return (EINVAL); 444 *atp = '\0'; 445 446 args = fnvlist_alloc(); 447 fnvlist_add_string(args, "firstsnap", firstsnap); 448 449 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); 450 nvlist_free(args); 451 if (err == 0) 452 *usedp = fnvlist_lookup_uint64(result, "used"); 453 fnvlist_free(result); 454 455 return (err); 456} 457 458boolean_t 459lzc_exists(const char *dataset) 460{ 461 /* 462 * The objset_stats ioctl is still legacy, so we need to construct our 463 * own zfs_cmd_t rather than using lzc_ioctl(). 464 */ 465 zfs_cmd_t zc = {"\0"}; 466 467 ASSERT3S(g_refcount, >, 0); 468 VERIFY3S(g_fd, !=, -1); 469 470 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 471 return (zfs_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); 472} 473 474/* 475 * outnvl is unused. 476 * It was added to preserve the function signature in case it is 477 * needed in the future. 478 */ 479/*ARGSUSED*/ 480int 481lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl) 482{ 483 return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL)); 484} 485 486/* 487 * Create "user holds" on snapshots. If there is a hold on a snapshot, 488 * the snapshot can not be destroyed. (However, it can be marked for deletion 489 * by lzc_destroy_snaps(defer=B_TRUE).) 490 * 491 * The keys in the nvlist are snapshot names. 492 * The snapshots must all be in the same pool. 493 * The value is the name of the hold (string type). 494 * 495 * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL). 496 * In this case, when the cleanup_fd is closed (including on process 497 * termination), the holds will be released. If the system is shut down 498 * uncleanly, the holds will be released when the pool is next opened 499 * or imported. 500 * 501 * Holds for snapshots which don't exist will be skipped and have an entry 502 * added to errlist, but will not cause an overall failure. 503 * 504 * The return value will be 0 if all holds, for snapshots that existed, 505 * were successfully created. 506 * 507 * Otherwise the return value will be the errno of a (unspecified) hold that 508 * failed and no holds will be created. 509 * 510 * In all cases the errlist will have an entry for each hold that failed 511 * (name = snapshot), with its value being the error code (int32). 512 */ 513int 514lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) 515{ 516 char pool[ZFS_MAX_DATASET_NAME_LEN]; 517 nvlist_t *args; 518 nvpair_t *elem; 519 int error; 520 521 /* determine the pool name */ 522 elem = nvlist_next_nvpair(holds, NULL); 523 if (elem == NULL) 524 return (0); 525 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 526 pool[strcspn(pool, "/@")] = '\0'; 527 528 args = fnvlist_alloc(); 529 fnvlist_add_nvlist(args, "holds", holds); 530 if (cleanup_fd != -1) 531 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); 532 533 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); 534 nvlist_free(args); 535 return (error); 536} 537 538/* 539 * Release "user holds" on snapshots. If the snapshot has been marked for 540 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have 541 * any clones, and all the user holds are removed, then the snapshot will be 542 * destroyed. 543 * 544 * The keys in the nvlist are snapshot names. 545 * The snapshots must all be in the same pool. 546 * The value is an nvlist whose keys are the holds to remove. 547 * 548 * Holds which failed to release because they didn't exist will have an entry 549 * added to errlist, but will not cause an overall failure. 550 * 551 * The return value will be 0 if the nvl holds was empty or all holds that 552 * existed, were successfully removed. 553 * 554 * Otherwise the return value will be the errno of a (unspecified) hold that 555 * failed to release and no holds will be released. 556 * 557 * In all cases the errlist will have an entry for each hold that failed to 558 * to release. 559 */ 560int 561lzc_release(nvlist_t *holds, nvlist_t **errlist) 562{ 563 char pool[ZFS_MAX_DATASET_NAME_LEN]; 564 nvpair_t *elem; 565 566 /* determine the pool name */ 567 elem = nvlist_next_nvpair(holds, NULL); 568 if (elem == NULL) 569 return (0); 570 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 571 pool[strcspn(pool, "/@")] = '\0'; 572 573 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); 574} 575 576/* 577 * Retrieve list of user holds on the specified snapshot. 578 * 579 * On success, *holdsp will be set to an nvlist which the caller must free. 580 * The keys are the names of the holds, and the value is the creation time 581 * of the hold (uint64) in seconds since the epoch. 582 */ 583int 584lzc_get_holds(const char *snapname, nvlist_t **holdsp) 585{ 586 return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp)); 587} 588 589/* 590 * Generate a zfs send stream for the specified snapshot and write it to 591 * the specified file descriptor. 592 * 593 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap") 594 * 595 * If "from" is NULL, a full (non-incremental) stream will be sent. 596 * If "from" is non-NULL, it must be the full name of a snapshot or 597 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or 598 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or 599 * bookmark must represent an earlier point in the history of "snapname"). 600 * It can be an earlier snapshot in the same filesystem or zvol as "snapname", 601 * or it can be the origin of "snapname"'s filesystem, or an earlier 602 * snapshot in the origin, etc. 603 * 604 * "fd" is the file descriptor to write the send stream to. 605 * 606 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted 607 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT 608 * records with drr_blksz > 128K. 609 * 610 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted 611 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, 612 * which the receiving system must support (as indicated by support 613 * for the "embedded_data" feature). 614 * 615 * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using 616 * compressed WRITE records for blocks which are compressed on disk and in 617 * memory. If the lz4_compress feature is active on the sending system, then 618 * the receiving system must have that feature enabled as well. 619 * 620 * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted 621 * datasets, by sending data exactly as it exists on disk. This allows backups 622 * to be taken even if encryption keys are not currently loaded. 623 */ 624int 625lzc_send(const char *snapname, const char *from, int fd, 626 enum lzc_send_flags flags) 627{ 628 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, 629 NULL)); 630} 631 632int 633lzc_send_redacted(const char *snapname, const char *from, int fd, 634 enum lzc_send_flags flags, const char *redactbook) 635{ 636 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, 637 redactbook)); 638} 639 640int 641lzc_send_resume(const char *snapname, const char *from, int fd, 642 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) 643{ 644 return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj, 645 resumeoff, NULL)); 646} 647 648/* 649 * snapname: The name of the "tosnap", or the snapshot whose contents we are 650 * sending. 651 * from: The name of the "fromsnap", or the incremental source. 652 * fd: File descriptor to write the stream to. 653 * flags: flags that determine features to be used by the stream. 654 * resumeobj: Object to resume from, for resuming send 655 * resumeoff: Offset to resume from, for resuming send. 656 * redactnv: nvlist of string -> boolean(ignored) containing the names of all 657 * the snapshots that we should redact with respect to. 658 * redactbook: Name of the redaction bookmark to create. 659 */ 660int 661lzc_send_resume_redacted(const char *snapname, const char *from, int fd, 662 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, 663 const char *redactbook) 664{ 665 nvlist_t *args; 666 int err; 667 668 args = fnvlist_alloc(); 669 fnvlist_add_int32(args, "fd", fd); 670 if (from != NULL) 671 fnvlist_add_string(args, "fromsnap", from); 672 if (flags & LZC_SEND_FLAG_LARGE_BLOCK) 673 fnvlist_add_boolean(args, "largeblockok"); 674 if (flags & LZC_SEND_FLAG_EMBED_DATA) 675 fnvlist_add_boolean(args, "embedok"); 676 if (flags & LZC_SEND_FLAG_COMPRESS) 677 fnvlist_add_boolean(args, "compressok"); 678 if (flags & LZC_SEND_FLAG_RAW) 679 fnvlist_add_boolean(args, "rawok"); 680 if (flags & LZC_SEND_FLAG_SAVED) 681 fnvlist_add_boolean(args, "savedok"); 682 if (resumeobj != 0 || resumeoff != 0) { 683 fnvlist_add_uint64(args, "resume_object", resumeobj); 684 fnvlist_add_uint64(args, "resume_offset", resumeoff); 685 } 686 if (redactbook != NULL) 687 fnvlist_add_string(args, "redactbook", redactbook); 688 689 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); 690 nvlist_free(args); 691 return (err); 692} 693 694/* 695 * "from" can be NULL, a snapshot, or a bookmark. 696 * 697 * If from is NULL, a full (non-incremental) stream will be estimated. This 698 * is calculated very efficiently. 699 * 700 * If from is a snapshot, lzc_send_space uses the deadlists attached to 701 * each snapshot to efficiently estimate the stream size. 702 * 703 * If from is a bookmark, the indirect blocks in the destination snapshot 704 * are traversed, looking for blocks with a birth time since the creation TXG of 705 * the snapshot this bookmark was created from. This will result in 706 * significantly more I/O and be less efficient than a send space estimation on 707 * an equivalent snapshot. This process is also used if redact_snaps is 708 * non-null. 709 */ 710int 711lzc_send_space_resume_redacted(const char *snapname, const char *from, 712 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, 713 uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep) 714{ 715 nvlist_t *args; 716 nvlist_t *result; 717 int err; 718 719 args = fnvlist_alloc(); 720 if (from != NULL) 721 fnvlist_add_string(args, "from", from); 722 if (flags & LZC_SEND_FLAG_LARGE_BLOCK) 723 fnvlist_add_boolean(args, "largeblockok"); 724 if (flags & LZC_SEND_FLAG_EMBED_DATA) 725 fnvlist_add_boolean(args, "embedok"); 726 if (flags & LZC_SEND_FLAG_COMPRESS) 727 fnvlist_add_boolean(args, "compressok"); 728 if (flags & LZC_SEND_FLAG_RAW) 729 fnvlist_add_boolean(args, "rawok"); 730 if (resumeobj != 0 || resumeoff != 0) { 731 fnvlist_add_uint64(args, "resume_object", resumeobj); 732 fnvlist_add_uint64(args, "resume_offset", resumeoff); 733 fnvlist_add_uint64(args, "bytes", resume_bytes); 734 } 735 if (redactbook != NULL) 736 fnvlist_add_string(args, "redactbook", redactbook); 737 if (fd != -1) 738 fnvlist_add_int32(args, "fd", fd); 739 740 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); 741 nvlist_free(args); 742 if (err == 0) 743 *spacep = fnvlist_lookup_uint64(result, "space"); 744 nvlist_free(result); 745 return (err); 746} 747 748int 749lzc_send_space(const char *snapname, const char *from, 750 enum lzc_send_flags flags, uint64_t *spacep) 751{ 752 return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0, 753 NULL, -1, spacep)); 754} 755 756static int 757recv_read(int fd, void *buf, int ilen) 758{ 759 char *cp = buf; 760 int rv; 761 int len = ilen; 762 763 do { 764 rv = read(fd, cp, len); 765 cp += rv; 766 len -= rv; 767 } while (rv > 0); 768 769 if (rv < 0 || len != 0) 770 return (EIO); 771 772 return (0); 773} 774 775/* 776 * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the 777 * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all 778 * stream options but is currently only used for resumable streams. This way 779 * updated user space utilities will interoperate with older kernel modules. 780 * 781 * Non-Linux OpenZFS platforms have opted to modify the legacy interface. 782 */ 783static int 784recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, 785 uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force, 786 boolean_t resumable, boolean_t raw, int input_fd, 787 const dmu_replay_record_t *begin_record, uint64_t *read_bytes, 788 uint64_t *errflags, nvlist_t **errors) 789{ 790 dmu_replay_record_t drr; 791 char fsname[MAXPATHLEN]; 792 char *atp; 793 int error; 794 boolean_t payload = B_FALSE; 795 796 ASSERT3S(g_refcount, >, 0); 797 VERIFY3S(g_fd, !=, -1); 798 799 /* Set 'fsname' to the name of containing filesystem */ 800 (void) strlcpy(fsname, snapname, sizeof (fsname)); 801 atp = strchr(fsname, '@'); 802 if (atp == NULL) 803 return (EINVAL); 804 *atp = '\0'; 805 806 /* If the fs does not exist, try its parent. */ 807 if (!lzc_exists(fsname)) { 808 char *slashp = strrchr(fsname, '/'); 809 if (slashp == NULL) 810 return (ENOENT); 811 *slashp = '\0'; 812 } 813 814 /* 815 * The begin_record is normally a non-byteswapped BEGIN record. 816 * For resumable streams it may be set to any non-byteswapped 817 * dmu_replay_record_t. 818 */ 819 if (begin_record == NULL) { 820 error = recv_read(input_fd, &drr, sizeof (drr)); 821 if (error != 0) 822 return (error); 823 } else { 824 drr = *begin_record; 825 payload = (begin_record->drr_payloadlen != 0); 826 } 827 828 /* 829 * All receives with a payload should use the new interface. 830 */ 831 if (resumable || raw || wkeydata != NULL || payload) { 832 nvlist_t *outnvl = NULL; 833 nvlist_t *innvl = fnvlist_alloc(); 834 835 fnvlist_add_string(innvl, "snapname", snapname); 836 837 if (recvdprops != NULL) 838 fnvlist_add_nvlist(innvl, "props", recvdprops); 839 840 if (localprops != NULL) 841 fnvlist_add_nvlist(innvl, "localprops", localprops); 842 843 if (wkeydata != NULL) { 844 /* 845 * wkeydata must be placed in the special 846 * ZPOOL_HIDDEN_ARGS nvlist so that it 847 * will not be printed to the zpool history. 848 */ 849 nvlist_t *hidden_args = fnvlist_alloc(); 850 fnvlist_add_uint8_array(hidden_args, "wkeydata", 851 wkeydata, wkeylen); 852 fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS, 853 hidden_args); 854 nvlist_free(hidden_args); 855 } 856 857 if (origin != NULL && strlen(origin)) 858 fnvlist_add_string(innvl, "origin", origin); 859 860 fnvlist_add_byte_array(innvl, "begin_record", 861 (uchar_t *)&drr, sizeof (drr)); 862 863 fnvlist_add_int32(innvl, "input_fd", input_fd); 864 865 if (force) 866 fnvlist_add_boolean(innvl, "force"); 867 868 if (resumable) 869 fnvlist_add_boolean(innvl, "resumable"); 870 871 872 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl); 873 874 if (error == 0 && read_bytes != NULL) 875 error = nvlist_lookup_uint64(outnvl, "read_bytes", 876 read_bytes); 877 878 if (error == 0 && errflags != NULL) 879 error = nvlist_lookup_uint64(outnvl, "error_flags", 880 errflags); 881 882 if (error == 0 && errors != NULL) { 883 nvlist_t *nvl; 884 error = nvlist_lookup_nvlist(outnvl, "errors", &nvl); 885 if (error == 0) 886 *errors = fnvlist_dup(nvl); 887 } 888 889 fnvlist_free(innvl); 890 fnvlist_free(outnvl); 891 } else { 892 zfs_cmd_t zc = {"\0"}; 893 char *packed = NULL; 894 size_t size; 895 896 ASSERT3S(g_refcount, >, 0); 897 898 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); 899 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); 900 901 if (recvdprops != NULL) { 902 packed = fnvlist_pack(recvdprops, &size); 903 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 904 zc.zc_nvlist_src_size = size; 905 } 906 907 if (localprops != NULL) { 908 packed = fnvlist_pack(localprops, &size); 909 zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed; 910 zc.zc_nvlist_conf_size = size; 911 } 912 913 if (origin != NULL) 914 (void) strlcpy(zc.zc_string, origin, 915 sizeof (zc.zc_string)); 916 917 ASSERT3S(drr.drr_type, ==, DRR_BEGIN); 918 zc.zc_begin_record = drr.drr_u.drr_begin; 919 zc.zc_guid = force; 920 zc.zc_cookie = input_fd; 921 zc.zc_cleanup_fd = -1; 922 zc.zc_action_handle = 0; 923 924 zc.zc_nvlist_dst_size = 128 * 1024; 925 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 926 malloc(zc.zc_nvlist_dst_size); 927 928 error = zfs_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc); 929 if (error != 0) { 930 error = errno; 931 } else { 932 if (read_bytes != NULL) 933 *read_bytes = zc.zc_cookie; 934 935 if (errflags != NULL) 936 *errflags = zc.zc_obj; 937 938 if (errors != NULL) 939 VERIFY0(nvlist_unpack( 940 (void *)(uintptr_t)zc.zc_nvlist_dst, 941 zc.zc_nvlist_dst_size, errors, KM_SLEEP)); 942 } 943 944 if (packed != NULL) 945 fnvlist_pack_free(packed, size); 946 free((void *)(uintptr_t)zc.zc_nvlist_dst); 947 } 948 949 return (error); 950} 951 952/* 953 * The simplest receive case: receive from the specified fd, creating the 954 * specified snapshot. Apply the specified properties as "received" properties 955 * (which can be overridden by locally-set properties). If the stream is a 956 * clone, its origin snapshot must be specified by 'origin'. The 'force' 957 * flag will cause the target filesystem to be rolled back or destroyed if 958 * necessary to receive. 959 * 960 * Return 0 on success or an errno on failure. 961 * 962 * Note: this interface does not work on dedup'd streams 963 * (those with DMU_BACKUP_FEATURE_DEDUP). 964 */ 965int 966lzc_receive(const char *snapname, nvlist_t *props, const char *origin, 967 boolean_t force, boolean_t raw, int fd) 968{ 969 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 970 B_FALSE, raw, fd, NULL, NULL, NULL, NULL)); 971} 972 973/* 974 * Like lzc_receive, but if the receive fails due to premature stream 975 * termination, the intermediate state will be preserved on disk. In this 976 * case, ECKSUM will be returned. The receive may subsequently be resumed 977 * with a resuming send stream generated by lzc_send_resume(). 978 */ 979int 980lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, 981 boolean_t force, boolean_t raw, int fd) 982{ 983 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 984 B_TRUE, raw, fd, NULL, NULL, NULL, NULL)); 985} 986 987/* 988 * Like lzc_receive, but allows the caller to read the begin record and then to 989 * pass it in. That could be useful if the caller wants to derive, for example, 990 * the snapname or the origin parameters based on the information contained in 991 * the begin record. 992 * The begin record must be in its original form as read from the stream, 993 * in other words, it should not be byteswapped. 994 * 995 * The 'resumable' parameter allows to obtain the same behavior as with 996 * lzc_receive_resumable. 997 */ 998int 999lzc_receive_with_header(const char *snapname, nvlist_t *props, 1000 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, 1001 int fd, const dmu_replay_record_t *begin_record) 1002{ 1003 if (begin_record == NULL) 1004 return (EINVAL); 1005 1006 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 1007 resumable, raw, fd, begin_record, NULL, NULL, NULL)); 1008} 1009 1010/* 1011 * Like lzc_receive, but allows the caller to pass all supported arguments 1012 * and retrieve all values returned. The only additional input parameter 1013 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor. 1014 * 1015 * The following parameters all provide return values. Several may be set 1016 * in the failure case and will contain additional information. 1017 * 1018 * The 'read_bytes' value will be set to the total number of bytes read. 1019 * 1020 * The 'errflags' value will contain zprop_errflags_t flags which are 1021 * used to describe any failures. 1022 * 1023 * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored. 1024 * 1025 * The 'errors' nvlist contains an entry for each unapplied received 1026 * property. Callers are responsible for freeing this nvlist. 1027 */ 1028int lzc_receive_one(const char *snapname, nvlist_t *props, 1029 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, 1030 int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd, 1031 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, 1032 nvlist_t **errors) 1033{ 1034 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 1035 resumable, raw, input_fd, begin_record, 1036 read_bytes, errflags, errors)); 1037} 1038 1039/* 1040 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops' 1041 * argument. 1042 * 1043 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and 1044 * exclude ('zfs receive -x') properties. Callers are responsible for freeing 1045 * this nvlist 1046 */ 1047int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props, 1048 nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin, 1049 boolean_t force, boolean_t resumable, boolean_t raw, int input_fd, 1050 const dmu_replay_record_t *begin_record, int cleanup_fd, 1051 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, 1052 nvlist_t **errors) 1053{ 1054 return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin, 1055 force, resumable, raw, input_fd, begin_record, 1056 read_bytes, errflags, errors)); 1057} 1058 1059/* 1060 * Roll back this filesystem or volume to its most recent snapshot. 1061 * If snapnamebuf is not NULL, it will be filled in with the name 1062 * of the most recent snapshot. 1063 * Note that the latest snapshot may change if a new one is concurrently 1064 * created or the current one is destroyed. lzc_rollback_to can be used 1065 * to roll back to a specific latest snapshot. 1066 * 1067 * Return 0 on success or an errno on failure. 1068 */ 1069int 1070lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen) 1071{ 1072 nvlist_t *args; 1073 nvlist_t *result; 1074 int err; 1075 1076 args = fnvlist_alloc(); 1077 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); 1078 nvlist_free(args); 1079 if (err == 0 && snapnamebuf != NULL) { 1080 const char *snapname = fnvlist_lookup_string(result, "target"); 1081 (void) strlcpy(snapnamebuf, snapname, snapnamelen); 1082 } 1083 nvlist_free(result); 1084 1085 return (err); 1086} 1087 1088/* 1089 * Roll back this filesystem or volume to the specified snapshot, 1090 * if possible. 1091 * 1092 * Return 0 on success or an errno on failure. 1093 */ 1094int 1095lzc_rollback_to(const char *fsname, const char *snapname) 1096{ 1097 nvlist_t *args; 1098 nvlist_t *result; 1099 int err; 1100 1101 args = fnvlist_alloc(); 1102 fnvlist_add_string(args, "target", snapname); 1103 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); 1104 nvlist_free(args); 1105 nvlist_free(result); 1106 return (err); 1107} 1108 1109/* 1110 * Creates new bookmarks from existing snapshot or bookmark. 1111 * 1112 * The bookmarks nvlist maps from the full name of the new bookmark to 1113 * the full name of the source snapshot or bookmark. 1114 * All the bookmarks and snapshots must be in the same pool. 1115 * The new bookmarks names must be unique. 1116 * => see function dsl_bookmark_create_nvl_validate 1117 * 1118 * The returned results nvlist will have an entry for each bookmark that failed. 1119 * The value will be the (int32) error code. 1120 * 1121 * The return value will be 0 if all bookmarks were created, otherwise it will 1122 * be the errno of a (undetermined) bookmarks that failed. 1123 */ 1124int 1125lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) 1126{ 1127 nvpair_t *elem; 1128 int error; 1129 char pool[ZFS_MAX_DATASET_NAME_LEN]; 1130 1131 /* determine pool name from first bookmark */ 1132 elem = nvlist_next_nvpair(bookmarks, NULL); 1133 if (elem == NULL) 1134 return (0); 1135 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 1136 pool[strcspn(pool, "/#")] = '\0'; 1137 1138 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist); 1139 1140 return (error); 1141} 1142 1143/* 1144 * Retrieve bookmarks. 1145 * 1146 * Retrieve the list of bookmarks for the given file system. The props 1147 * parameter is an nvlist of property names (with no values) that will be 1148 * returned for each bookmark. 1149 * 1150 * The following are valid properties on bookmarks, most of which are numbers 1151 * (represented as uint64 in the nvlist), except redact_snaps, which is a 1152 * uint64 array, and redact_complete, which is a boolean 1153 * 1154 * "guid" - globally unique identifier of the snapshot it refers to 1155 * "createtxg" - txg when the snapshot it refers to was created 1156 * "creation" - timestamp when the snapshot it refers to was created 1157 * "ivsetguid" - IVset guid for identifying encrypted snapshots 1158 * "redact_snaps" - list of guids of the redaction snapshots for the specified 1159 * bookmark. If the bookmark is not a redaction bookmark, the nvlist will 1160 * not contain an entry for this value. If it is redacted with respect to 1161 * no snapshots, it will contain value -> NULL uint64 array 1162 * "redact_complete" - boolean value; true if the redaction bookmark is 1163 * complete, false otherwise. 1164 * 1165 * The format of the returned nvlist as follows: 1166 * <short name of bookmark> -> { 1167 * <name of property> -> { 1168 * "value" -> uint64 1169 * } 1170 * ... 1171 * "redact_snaps" -> { 1172 * "value" -> uint64 array 1173 * } 1174 * "redact_complete" -> { 1175 * "value" -> boolean value 1176 * } 1177 * } 1178 */ 1179int 1180lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks) 1181{ 1182 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks)); 1183} 1184 1185/* 1186 * Get bookmark properties. 1187 * 1188 * Given a bookmark's full name, retrieve all properties for the bookmark. 1189 * 1190 * The format of the returned property list is as follows: 1191 * { 1192 * <name of property> -> { 1193 * "value" -> uint64 1194 * } 1195 * ... 1196 * "redact_snaps" -> { 1197 * "value" -> uint64 array 1198 * } 1199 */ 1200int 1201lzc_get_bookmark_props(const char *bookmark, nvlist_t **props) 1202{ 1203 int error; 1204 1205 nvlist_t *innvl = fnvlist_alloc(); 1206 error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props); 1207 fnvlist_free(innvl); 1208 1209 return (error); 1210} 1211 1212/* 1213 * Destroys bookmarks. 1214 * 1215 * The keys in the bmarks nvlist are the bookmarks to be destroyed. 1216 * They must all be in the same pool. Bookmarks are specified as 1217 * <fs>#<bmark>. 1218 * 1219 * Bookmarks that do not exist will be silently ignored. 1220 * 1221 * The return value will be 0 if all bookmarks that existed were destroyed. 1222 * 1223 * Otherwise the return value will be the errno of a (undetermined) bookmark 1224 * that failed, no bookmarks will be destroyed, and the errlist will have an 1225 * entry for each bookmarks that failed. The value in the errlist will be 1226 * the (int32) error code. 1227 */ 1228int 1229lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) 1230{ 1231 nvpair_t *elem; 1232 int error; 1233 char pool[ZFS_MAX_DATASET_NAME_LEN]; 1234 1235 /* determine the pool name */ 1236 elem = nvlist_next_nvpair(bmarks, NULL); 1237 if (elem == NULL) 1238 return (0); 1239 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 1240 pool[strcspn(pool, "/#")] = '\0'; 1241 1242 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist); 1243 1244 return (error); 1245} 1246 1247static int 1248lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync, 1249 uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1250{ 1251 int error; 1252 nvlist_t *args; 1253 1254 args = fnvlist_alloc(); 1255 fnvlist_add_string(args, ZCP_ARG_PROGRAM, program); 1256 fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl); 1257 fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync); 1258 fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit); 1259 fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit); 1260 error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl); 1261 fnvlist_free(args); 1262 1263 return (error); 1264} 1265 1266/* 1267 * Executes a channel program. 1268 * 1269 * If this function returns 0 the channel program was successfully loaded and 1270 * ran without failing. Note that individual commands the channel program ran 1271 * may have failed and the channel program is responsible for reporting such 1272 * errors through outnvl if they are important. 1273 * 1274 * This method may also return: 1275 * 1276 * EINVAL The program contains syntax errors, or an invalid memory or time 1277 * limit was given. No part of the channel program was executed. 1278 * If caused by syntax errors, 'outnvl' contains information about the 1279 * errors. 1280 * 1281 * ECHRNG The program was executed, but encountered a runtime error, such as 1282 * calling a function with incorrect arguments, invoking the error() 1283 * function directly, failing an assert() command, etc. Some portion 1284 * of the channel program may have executed and committed changes. 1285 * Information about the failure can be found in 'outnvl'. 1286 * 1287 * ENOMEM The program fully executed, but the output buffer was not large 1288 * enough to store the returned value. No output is returned through 1289 * 'outnvl'. 1290 * 1291 * ENOSPC The program was terminated because it exceeded its memory usage 1292 * limit. Some portion of the channel program may have executed and 1293 * committed changes to disk. No output is returned through 'outnvl'. 1294 * 1295 * ETIME The program was terminated because it exceeded its Lua instruction 1296 * limit. Some portion of the channel program may have executed and 1297 * committed changes to disk. No output is returned through 'outnvl'. 1298 */ 1299int 1300lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit, 1301 uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1302{ 1303 return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit, 1304 memlimit, argnvl, outnvl)); 1305} 1306 1307/* 1308 * Creates a checkpoint for the specified pool. 1309 * 1310 * If this function returns 0 the pool was successfully checkpointed. 1311 * 1312 * This method may also return: 1313 * 1314 * ZFS_ERR_CHECKPOINT_EXISTS 1315 * The pool already has a checkpoint. A pools can only have one 1316 * checkpoint at most, at any given time. 1317 * 1318 * ZFS_ERR_DISCARDING_CHECKPOINT 1319 * ZFS is in the middle of discarding a checkpoint for this pool. 1320 * The pool can be checkpointed again once the discard is done. 1321 * 1322 * ZFS_DEVRM_IN_PROGRESS 1323 * A vdev is currently being removed. The pool cannot be 1324 * checkpointed until the device removal is done. 1325 * 1326 * ZFS_VDEV_TOO_BIG 1327 * One or more top-level vdevs exceed the maximum vdev size 1328 * supported for this feature. 1329 */ 1330int 1331lzc_pool_checkpoint(const char *pool) 1332{ 1333 int error; 1334 1335 nvlist_t *result = NULL; 1336 nvlist_t *args = fnvlist_alloc(); 1337 1338 error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result); 1339 1340 fnvlist_free(args); 1341 fnvlist_free(result); 1342 1343 return (error); 1344} 1345 1346/* 1347 * Discard the checkpoint from the specified pool. 1348 * 1349 * If this function returns 0 the checkpoint was successfully discarded. 1350 * 1351 * This method may also return: 1352 * 1353 * ZFS_ERR_NO_CHECKPOINT 1354 * The pool does not have a checkpoint. 1355 * 1356 * ZFS_ERR_DISCARDING_CHECKPOINT 1357 * ZFS is already in the middle of discarding the checkpoint. 1358 */ 1359int 1360lzc_pool_checkpoint_discard(const char *pool) 1361{ 1362 int error; 1363 1364 nvlist_t *result = NULL; 1365 nvlist_t *args = fnvlist_alloc(); 1366 1367 error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result); 1368 1369 fnvlist_free(args); 1370 fnvlist_free(result); 1371 1372 return (error); 1373} 1374 1375/* 1376 * Executes a read-only channel program. 1377 * 1378 * A read-only channel program works programmatically the same way as a 1379 * normal channel program executed with lzc_channel_program(). The only 1380 * difference is it runs exclusively in open-context and therefore can 1381 * return faster. The downside to that, is that the program cannot change 1382 * on-disk state by calling functions from the zfs.sync submodule. 1383 * 1384 * The return values of this function (and their meaning) are exactly the 1385 * same as the ones described in lzc_channel_program(). 1386 */ 1387int 1388lzc_channel_program_nosync(const char *pool, const char *program, 1389 uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1390{ 1391 return (lzc_channel_program_impl(pool, program, B_FALSE, timeout, 1392 memlimit, argnvl, outnvl)); 1393} 1394 1395/* 1396 * Performs key management functions 1397 * 1398 * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to 1399 * load or change a wrapping key, the key should be specified in the 1400 * hidden_args nvlist so that it is not logged. 1401 */ 1402int 1403lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata, 1404 uint_t wkeylen) 1405{ 1406 int error; 1407 nvlist_t *ioc_args; 1408 nvlist_t *hidden_args; 1409 1410 if (wkeydata == NULL) 1411 return (EINVAL); 1412 1413 ioc_args = fnvlist_alloc(); 1414 hidden_args = fnvlist_alloc(); 1415 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen); 1416 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); 1417 if (noop) 1418 fnvlist_add_boolean(ioc_args, "noop"); 1419 error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL); 1420 nvlist_free(hidden_args); 1421 nvlist_free(ioc_args); 1422 1423 return (error); 1424} 1425 1426int 1427lzc_unload_key(const char *fsname) 1428{ 1429 return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL)); 1430} 1431 1432int 1433lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props, 1434 uint8_t *wkeydata, uint_t wkeylen) 1435{ 1436 int error; 1437 nvlist_t *ioc_args = fnvlist_alloc(); 1438 nvlist_t *hidden_args = NULL; 1439 1440 fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd); 1441 1442 if (wkeydata != NULL) { 1443 hidden_args = fnvlist_alloc(); 1444 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, 1445 wkeylen); 1446 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); 1447 } 1448 1449 if (props != NULL) 1450 fnvlist_add_nvlist(ioc_args, "props", props); 1451 1452 error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL); 1453 nvlist_free(hidden_args); 1454 nvlist_free(ioc_args); 1455 1456 return (error); 1457} 1458 1459int 1460lzc_reopen(const char *pool_name, boolean_t scrub_restart) 1461{ 1462 nvlist_t *args = fnvlist_alloc(); 1463 int error; 1464 1465 fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart); 1466 1467 error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL); 1468 nvlist_free(args); 1469 return (error); 1470} 1471 1472/* 1473 * Changes initializing state. 1474 * 1475 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. 1476 * The key is ignored. 1477 * 1478 * If there are errors related to vdev arguments, per-vdev errors are returned 1479 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where 1480 * guid is stringified with PRIu64, and errno is one of the following as 1481 * an int64_t: 1482 * - ENODEV if the device was not found 1483 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) 1484 * - EROFS if the device is not writeable 1485 * - EBUSY start requested but the device is already being either 1486 * initialized or trimmed 1487 * - ESRCH cancel/suspend requested but device is not being initialized 1488 * 1489 * If the errlist is empty, then return value will be: 1490 * - EINVAL if one or more arguments was invalid 1491 * - Other spa_open failures 1492 * - 0 if the operation succeeded 1493 */ 1494int 1495lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type, 1496 nvlist_t *vdevs, nvlist_t **errlist) 1497{ 1498 int error; 1499 1500 nvlist_t *args = fnvlist_alloc(); 1501 fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type); 1502 fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs); 1503 1504 error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist); 1505 1506 fnvlist_free(args); 1507 1508 return (error); 1509} 1510 1511/* 1512 * Changes TRIM state. 1513 * 1514 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. 1515 * The key is ignored. 1516 * 1517 * If there are errors related to vdev arguments, per-vdev errors are returned 1518 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where 1519 * guid is stringified with PRIu64, and errno is one of the following as 1520 * an int64_t: 1521 * - ENODEV if the device was not found 1522 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) 1523 * - EROFS if the device is not writeable 1524 * - EBUSY start requested but the device is already being either trimmed 1525 * or initialized 1526 * - ESRCH cancel/suspend requested but device is not being initialized 1527 * - EOPNOTSUPP if the device does not support TRIM (or secure TRIM) 1528 * 1529 * If the errlist is empty, then return value will be: 1530 * - EINVAL if one or more arguments was invalid 1531 * - Other spa_open failures 1532 * - 0 if the operation succeeded 1533 */ 1534int 1535lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate, 1536 boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist) 1537{ 1538 int error; 1539 1540 nvlist_t *args = fnvlist_alloc(); 1541 fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type); 1542 fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs); 1543 fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate); 1544 fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure); 1545 1546 error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist); 1547 1548 fnvlist_free(args); 1549 1550 return (error); 1551} 1552 1553/* 1554 * Create a redaction bookmark named bookname by redacting snapshot with respect 1555 * to all the snapshots in snapnv. 1556 */ 1557int 1558lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv) 1559{ 1560 nvlist_t *args = fnvlist_alloc(); 1561 fnvlist_add_string(args, "bookname", bookname); 1562 fnvlist_add_nvlist(args, "snapnv", snapnv); 1563 int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL); 1564 fnvlist_free(args); 1565 return (error); 1566} 1567 1568static int 1569wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag, 1570 uint64_t tag, boolean_t *waited) 1571{ 1572 nvlist_t *args = fnvlist_alloc(); 1573 nvlist_t *result = NULL; 1574 1575 fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity); 1576 if (use_tag) 1577 fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag); 1578 1579 int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result); 1580 1581 if (error == 0 && waited != NULL) 1582 *waited = fnvlist_lookup_boolean_value(result, 1583 ZPOOL_WAIT_WAITED); 1584 1585 fnvlist_free(args); 1586 fnvlist_free(result); 1587 1588 return (error); 1589} 1590 1591int 1592lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited) 1593{ 1594 return (wait_common(pool, activity, B_FALSE, 0, waited)); 1595} 1596 1597int 1598lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag, 1599 boolean_t *waited) 1600{ 1601 return (wait_common(pool, activity, B_TRUE, tag, waited)); 1602} 1603 1604int 1605lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited) 1606{ 1607 nvlist_t *args = fnvlist_alloc(); 1608 nvlist_t *result = NULL; 1609 1610 fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity); 1611 1612 int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result); 1613 1614 if (error == 0 && waited != NULL) 1615 *waited = fnvlist_lookup_boolean_value(result, 1616 ZFS_WAIT_WAITED); 1617 1618 fnvlist_free(args); 1619 fnvlist_free(result); 1620 1621 return (error); 1622} 1623 1624/* 1625 * Set the bootenv contents for the given pool. 1626 */ 1627int 1628lzc_set_bootenv(const char *pool, const nvlist_t *env) 1629{ 1630 return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL)); 1631} 1632 1633/* 1634 * Get the contents of the bootenv of the given pool. 1635 */ 1636int 1637lzc_get_bootenv(const char *pool, nvlist_t **outnvl) 1638{ 1639 return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl)); 1640} 1641