zvol.c revision 325132
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * 24 * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org> 25 * All rights reserved. 26 * 27 * Portions Copyright 2010 Robert Milkowski 28 * 29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 30 * Copyright (c) 2012, 2017 by Delphix. All rights reserved. 31 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 32 * Copyright (c) 2014 Integros [integros.com] 33 */ 34 35/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 36 37/* 38 * ZFS volume emulation driver. 39 * 40 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. 41 * Volumes are accessed through the symbolic links named: 42 * 43 * /dev/zvol/dsk/<pool_name>/<dataset_name> 44 * /dev/zvol/rdsk/<pool_name>/<dataset_name> 45 * 46 * These links are created by the /dev filesystem (sdev_zvolops.c). 47 * Volumes are persistent through reboot. No user command needs to be 48 * run before opening and using a device. 49 * 50 * FreeBSD notes. 51 * On FreeBSD ZVOLs are simply GEOM providers like any other storage device 52 * in the system. 53 */ 54 55#include <sys/types.h> 56#include <sys/param.h> 57#include <sys/kernel.h> 58#include <sys/errno.h> 59#include <sys/uio.h> 60#include <sys/bio.h> 61#include <sys/buf.h> 62#include <sys/kmem.h> 63#include <sys/conf.h> 64#include <sys/cmn_err.h> 65#include <sys/stat.h> 66#include <sys/zap.h> 67#include <sys/spa.h> 68#include <sys/spa_impl.h> 69#include <sys/zio.h> 70#include <sys/disk.h> 71#include <sys/dmu_traverse.h> 72#include <sys/dnode.h> 73#include <sys/dsl_dataset.h> 74#include <sys/dsl_prop.h> 75#include <sys/dkio.h> 76#include <sys/byteorder.h> 77#include <sys/sunddi.h> 78#include <sys/dirent.h> 79#include <sys/policy.h> 80#include <sys/queue.h> 81#include <sys/fs/zfs.h> 82#include <sys/zfs_ioctl.h> 83#include <sys/zil.h> 84#include <sys/refcount.h> 85#include <sys/zfs_znode.h> 86#include <sys/zfs_rlock.h> 87#include <sys/vdev_impl.h> 88#include <sys/vdev_raidz.h> 89#include <sys/zvol.h> 90#include <sys/zil_impl.h> 91#include <sys/dbuf.h> 92#include <sys/dmu_tx.h> 93#include <sys/zfeature.h> 94#include <sys/zio_checksum.h> 95#include <sys/zil_impl.h> 96#include <sys/filio.h> 97 98#include <geom/geom.h> 99 100#include "zfs_namecheck.h" 101 102#ifndef illumos 103struct g_class zfs_zvol_class = { 104 .name = "ZFS::ZVOL", 105 .version = G_VERSION, 106}; 107 108DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol); 109 110#endif 111void *zfsdev_state; 112static char *zvol_tag = "zvol_tag"; 113 114#define ZVOL_DUMPSIZE "dumpsize" 115 116/* 117 * This lock protects the zfsdev_state structure from being modified 118 * while it's being used, e.g. an open that comes in before a create 119 * finishes. It also protects temporary opens of the dataset so that, 120 * e.g., an open doesn't get a spurious EBUSY. 121 */ 122#ifdef illumos 123kmutex_t zfsdev_state_lock; 124#else 125/* 126 * In FreeBSD we've replaced the upstream zfsdev_state_lock with the 127 * spa_namespace_lock in the ZVOL code. 128 */ 129#define zfsdev_state_lock spa_namespace_lock 130#endif 131static uint32_t zvol_minors; 132 133#ifndef illumos 134SYSCTL_DECL(_vfs_zfs); 135SYSCTL_NODE(_vfs_zfs, OID_AUTO, vol, CTLFLAG_RW, 0, "ZFS VOLUME"); 136static int volmode = ZFS_VOLMODE_GEOM; 137SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, mode, CTLFLAG_RWTUN, &volmode, 0, 138 "Expose as GEOM providers (1), device files (2) or neither"); 139static boolean_t zpool_on_zvol = B_FALSE; 140SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, recursive, CTLFLAG_RWTUN, &zpool_on_zvol, 0, 141 "Allow zpools to use zvols as vdevs (DANGEROUS)"); 142 143#endif 144typedef struct zvol_extent { 145 list_node_t ze_node; 146 dva_t ze_dva; /* dva associated with this extent */ 147 uint64_t ze_nblks; /* number of blocks in extent */ 148} zvol_extent_t; 149 150/* 151 * The in-core state of each volume. 152 */ 153typedef struct zvol_state { 154#ifndef illumos 155 LIST_ENTRY(zvol_state) zv_links; 156#endif 157 char zv_name[MAXPATHLEN]; /* pool/dd name */ 158 uint64_t zv_volsize; /* amount of space we advertise */ 159 uint64_t zv_volblocksize; /* volume block size */ 160#ifdef illumos 161 minor_t zv_minor; /* minor number */ 162#else 163 struct cdev *zv_dev; /* non-GEOM device */ 164 struct g_provider *zv_provider; /* GEOM provider */ 165#endif 166 uint8_t zv_min_bs; /* minimum addressable block shift */ 167 uint8_t zv_flags; /* readonly, dumpified, etc. */ 168 objset_t *zv_objset; /* objset handle */ 169#ifdef illumos 170 uint32_t zv_open_count[OTYPCNT]; /* open counts */ 171#endif 172 uint32_t zv_total_opens; /* total open count */ 173 uint32_t zv_sync_cnt; /* synchronous open count */ 174 zilog_t *zv_zilog; /* ZIL handle */ 175 list_t zv_extents; /* List of extents for dump */ 176 znode_t zv_znode; /* for range locking */ 177 dmu_buf_t *zv_dbuf; /* bonus handle */ 178#ifndef illumos 179 int zv_state; 180 int zv_volmode; /* Provide GEOM or cdev */ 181 struct bio_queue_head zv_queue; 182 struct mtx zv_queue_mtx; /* zv_queue mutex */ 183#endif 184} zvol_state_t; 185 186#ifndef illumos 187static LIST_HEAD(, zvol_state) all_zvols; 188#endif 189/* 190 * zvol specific flags 191 */ 192#define ZVOL_RDONLY 0x1 193#define ZVOL_DUMPIFIED 0x2 194#define ZVOL_EXCL 0x4 195#define ZVOL_WCE 0x8 196 197/* 198 * zvol maximum transfer in one DMU tx. 199 */ 200int zvol_maxphys = DMU_MAX_ACCESS/2; 201 202/* 203 * Toggle unmap functionality. 204 */ 205boolean_t zvol_unmap_enabled = B_TRUE; 206 207/* 208 * If true, unmaps requested as synchronous are executed synchronously, 209 * otherwise all unmaps are asynchronous. 210 */ 211boolean_t zvol_unmap_sync_enabled = B_FALSE; 212 213#ifndef illumos 214SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, unmap_enabled, CTLFLAG_RWTUN, 215 &zvol_unmap_enabled, 0, 216 "Enable UNMAP functionality"); 217 218SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, unmap_sync_enabled, CTLFLAG_RWTUN, 219 &zvol_unmap_sync_enabled, 0, 220 "UNMAPs requested as sync are executed synchronously"); 221 222static d_open_t zvol_d_open; 223static d_close_t zvol_d_close; 224static d_read_t zvol_read; 225static d_write_t zvol_write; 226static d_ioctl_t zvol_d_ioctl; 227static d_strategy_t zvol_strategy; 228 229static struct cdevsw zvol_cdevsw = { 230 .d_version = D_VERSION, 231 .d_open = zvol_d_open, 232 .d_close = zvol_d_close, 233 .d_read = zvol_read, 234 .d_write = zvol_write, 235 .d_ioctl = zvol_d_ioctl, 236 .d_strategy = zvol_strategy, 237 .d_name = "zvol", 238 .d_flags = D_DISK | D_TRACKCLOSE, 239}; 240 241static void zvol_geom_run(zvol_state_t *zv); 242static void zvol_geom_destroy(zvol_state_t *zv); 243static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace); 244static void zvol_geom_start(struct bio *bp); 245static void zvol_geom_worker(void *arg); 246static void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, 247 uint64_t len, boolean_t sync); 248#endif /* !illumos */ 249 250extern int zfs_set_prop_nvlist(const char *, zprop_source_t, 251 nvlist_t *, nvlist_t *); 252static int zvol_remove_zv(zvol_state_t *); 253static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, 254 struct lwb *lwb, zio_t *zio); 255static int zvol_dumpify(zvol_state_t *zv); 256static int zvol_dump_fini(zvol_state_t *zv); 257static int zvol_dump_init(zvol_state_t *zv, boolean_t resize); 258 259static void 260zvol_size_changed(zvol_state_t *zv, uint64_t volsize) 261{ 262#ifdef illumos 263 dev_t dev = makedevice(ddi_driver_major(zfs_dip), zv->zv_minor); 264 265 zv->zv_volsize = volsize; 266 VERIFY(ddi_prop_update_int64(dev, zfs_dip, 267 "Size", volsize) == DDI_SUCCESS); 268 VERIFY(ddi_prop_update_int64(dev, zfs_dip, 269 "Nblocks", lbtodb(volsize)) == DDI_SUCCESS); 270 271 /* Notify specfs to invalidate the cached size */ 272 spec_size_invalidate(dev, VBLK); 273 spec_size_invalidate(dev, VCHR); 274#else /* !illumos */ 275 zv->zv_volsize = volsize; 276 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 277 struct g_provider *pp; 278 279 pp = zv->zv_provider; 280 if (pp == NULL) 281 return; 282 g_topology_lock(); 283 284 /* 285 * Do not invoke resize event when initial size was zero. 286 * ZVOL initializes the size on first open, this is not 287 * real resizing. 288 */ 289 if (pp->mediasize == 0) 290 pp->mediasize = zv->zv_volsize; 291 else 292 g_resize_provider(pp, zv->zv_volsize); 293 g_topology_unlock(); 294 } 295#endif /* illumos */ 296} 297 298int 299zvol_check_volsize(uint64_t volsize, uint64_t blocksize) 300{ 301 if (volsize == 0) 302 return (SET_ERROR(EINVAL)); 303 304 if (volsize % blocksize != 0) 305 return (SET_ERROR(EINVAL)); 306 307#ifdef _ILP32 308 if (volsize - 1 > SPEC_MAXOFFSET_T) 309 return (SET_ERROR(EOVERFLOW)); 310#endif 311 return (0); 312} 313 314int 315zvol_check_volblocksize(uint64_t volblocksize) 316{ 317 if (volblocksize < SPA_MINBLOCKSIZE || 318 volblocksize > SPA_OLD_MAXBLOCKSIZE || 319 !ISP2(volblocksize)) 320 return (SET_ERROR(EDOM)); 321 322 return (0); 323} 324 325int 326zvol_get_stats(objset_t *os, nvlist_t *nv) 327{ 328 int error; 329 dmu_object_info_t doi; 330 uint64_t val; 331 332 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); 333 if (error) 334 return (error); 335 336 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); 337 338 error = dmu_object_info(os, ZVOL_OBJ, &doi); 339 340 if (error == 0) { 341 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, 342 doi.doi_data_block_size); 343 } 344 345 return (error); 346} 347 348static zvol_state_t * 349zvol_minor_lookup(const char *name) 350{ 351#ifdef illumos 352 minor_t minor; 353#endif 354 zvol_state_t *zv; 355 356 ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 357 358#ifdef illumos 359 for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { 360 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 361 if (zv == NULL) 362 continue; 363#else 364 LIST_FOREACH(zv, &all_zvols, zv_links) { 365#endif 366 if (strcmp(zv->zv_name, name) == 0) 367 return (zv); 368 } 369 370 return (NULL); 371} 372 373/* extent mapping arg */ 374struct maparg { 375 zvol_state_t *ma_zv; 376 uint64_t ma_blks; 377}; 378 379/*ARGSUSED*/ 380static int 381zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 382 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 383{ 384 struct maparg *ma = arg; 385 zvol_extent_t *ze; 386 int bs = ma->ma_zv->zv_volblocksize; 387 388 if (bp == NULL || BP_IS_HOLE(bp) || 389 zb->zb_object != ZVOL_OBJ || zb->zb_level != 0) 390 return (0); 391 392 VERIFY(!BP_IS_EMBEDDED(bp)); 393 394 VERIFY3U(ma->ma_blks, ==, zb->zb_blkid); 395 ma->ma_blks++; 396 397 /* Abort immediately if we have encountered gang blocks */ 398 if (BP_IS_GANG(bp)) 399 return (SET_ERROR(EFRAGS)); 400 401 /* 402 * See if the block is at the end of the previous extent. 403 */ 404 ze = list_tail(&ma->ma_zv->zv_extents); 405 if (ze && 406 DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) && 407 DVA_GET_OFFSET(BP_IDENTITY(bp)) == 408 DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) { 409 ze->ze_nblks++; 410 return (0); 411 } 412 413 dprintf_bp(bp, "%s", "next blkptr:"); 414 415 /* start a new extent */ 416 ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP); 417 ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ 418 ze->ze_nblks = 1; 419 list_insert_tail(&ma->ma_zv->zv_extents, ze); 420 return (0); 421} 422 423static void 424zvol_free_extents(zvol_state_t *zv) 425{ 426 zvol_extent_t *ze; 427 428 while (ze = list_head(&zv->zv_extents)) { 429 list_remove(&zv->zv_extents, ze); 430 kmem_free(ze, sizeof (zvol_extent_t)); 431 } 432} 433 434static int 435zvol_get_lbas(zvol_state_t *zv) 436{ 437 objset_t *os = zv->zv_objset; 438 struct maparg ma; 439 int err; 440 441 ma.ma_zv = zv; 442 ma.ma_blks = 0; 443 zvol_free_extents(zv); 444 445 /* commit any in-flight changes before traversing the dataset */ 446 txg_wait_synced(dmu_objset_pool(os), 0); 447 err = traverse_dataset(dmu_objset_ds(os), 0, 448 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma); 449 if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) { 450 zvol_free_extents(zv); 451 return (err ? err : EIO); 452 } 453 454 return (0); 455} 456 457/* ARGSUSED */ 458void 459zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 460{ 461 zfs_creat_t *zct = arg; 462 nvlist_t *nvprops = zct->zct_props; 463 int error; 464 uint64_t volblocksize, volsize; 465 466 VERIFY(nvlist_lookup_uint64(nvprops, 467 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); 468 if (nvlist_lookup_uint64(nvprops, 469 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) 470 volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); 471 472 /* 473 * These properties must be removed from the list so the generic 474 * property setting step won't apply to them. 475 */ 476 VERIFY(nvlist_remove_all(nvprops, 477 zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); 478 (void) nvlist_remove_all(nvprops, 479 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); 480 481 error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, 482 DMU_OT_NONE, 0, tx); 483 ASSERT(error == 0); 484 485 error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, 486 DMU_OT_NONE, 0, tx); 487 ASSERT(error == 0); 488 489 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); 490 ASSERT(error == 0); 491} 492 493/* 494 * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we 495 * implement DKIOCFREE/free-long-range. 496 */ 497static int 498zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap) 499{ 500 uint64_t offset, length; 501 502 if (byteswap) 503 byteswap_uint64_array(lr, sizeof (*lr)); 504 505 offset = lr->lr_offset; 506 length = lr->lr_length; 507 508 return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length)); 509} 510 511/* 512 * Replay a TX_WRITE ZIL transaction that didn't get committed 513 * after a system failure 514 */ 515static int 516zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) 517{ 518 objset_t *os = zv->zv_objset; 519 char *data = (char *)(lr + 1); /* data follows lr_write_t */ 520 uint64_t offset, length; 521 dmu_tx_t *tx; 522 int error; 523 524 if (byteswap) 525 byteswap_uint64_array(lr, sizeof (*lr)); 526 527 offset = lr->lr_offset; 528 length = lr->lr_length; 529 530 /* If it's a dmu_sync() block, write the whole block */ 531 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 532 uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 533 if (length < blocksize) { 534 offset -= offset % blocksize; 535 length = blocksize; 536 } 537 } 538 539 tx = dmu_tx_create(os); 540 dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length); 541 error = dmu_tx_assign(tx, TXG_WAIT); 542 if (error) { 543 dmu_tx_abort(tx); 544 } else { 545 dmu_write(os, ZVOL_OBJ, offset, length, data, tx); 546 dmu_tx_commit(tx); 547 } 548 549 return (error); 550} 551 552/* ARGSUSED */ 553static int 554zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) 555{ 556 return (SET_ERROR(ENOTSUP)); 557} 558 559/* 560 * Callback vectors for replaying records. 561 * Only TX_WRITE and TX_TRUNCATE are needed for zvol. 562 */ 563zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { 564 zvol_replay_err, /* 0 no such transaction type */ 565 zvol_replay_err, /* TX_CREATE */ 566 zvol_replay_err, /* TX_MKDIR */ 567 zvol_replay_err, /* TX_MKXATTR */ 568 zvol_replay_err, /* TX_SYMLINK */ 569 zvol_replay_err, /* TX_REMOVE */ 570 zvol_replay_err, /* TX_RMDIR */ 571 zvol_replay_err, /* TX_LINK */ 572 zvol_replay_err, /* TX_RENAME */ 573 zvol_replay_write, /* TX_WRITE */ 574 zvol_replay_truncate, /* TX_TRUNCATE */ 575 zvol_replay_err, /* TX_SETATTR */ 576 zvol_replay_err, /* TX_ACL */ 577 zvol_replay_err, /* TX_CREATE_ACL */ 578 zvol_replay_err, /* TX_CREATE_ATTR */ 579 zvol_replay_err, /* TX_CREATE_ACL_ATTR */ 580 zvol_replay_err, /* TX_MKDIR_ACL */ 581 zvol_replay_err, /* TX_MKDIR_ATTR */ 582 zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ 583 zvol_replay_err, /* TX_WRITE2 */ 584}; 585 586#ifdef illumos 587int 588zvol_name2minor(const char *name, minor_t *minor) 589{ 590 zvol_state_t *zv; 591 592 mutex_enter(&zfsdev_state_lock); 593 zv = zvol_minor_lookup(name); 594 if (minor && zv) 595 *minor = zv->zv_minor; 596 mutex_exit(&zfsdev_state_lock); 597 return (zv ? 0 : -1); 598} 599#endif /* illumos */ 600 601/* 602 * Create a minor node (plus a whole lot more) for the specified volume. 603 */ 604int 605zvol_create_minor(const char *name) 606{ 607 zfs_soft_state_t *zs; 608 zvol_state_t *zv; 609 objset_t *os; 610#ifdef illumos 611 dmu_object_info_t doi; 612 minor_t minor = 0; 613 char chrbuf[30], blkbuf[30]; 614#else 615 struct g_provider *pp; 616 struct g_geom *gp; 617 uint64_t mode; 618#endif 619 int error; 620 621#ifndef illumos 622 ZFS_LOG(1, "Creating ZVOL %s...", name); 623#endif 624 625 mutex_enter(&zfsdev_state_lock); 626 627 if (zvol_minor_lookup(name) != NULL) { 628 mutex_exit(&zfsdev_state_lock); 629 return (SET_ERROR(EEXIST)); 630 } 631 632 /* lie and say we're read-only */ 633 error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); 634 635 if (error) { 636 mutex_exit(&zfsdev_state_lock); 637 return (error); 638 } 639 640#ifdef illumos 641 if ((minor = zfsdev_minor_alloc()) == 0) { 642 dmu_objset_disown(os, FTAG); 643 mutex_exit(&zfsdev_state_lock); 644 return (SET_ERROR(ENXIO)); 645 } 646 647 if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { 648 dmu_objset_disown(os, FTAG); 649 mutex_exit(&zfsdev_state_lock); 650 return (SET_ERROR(EAGAIN)); 651 } 652 (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, 653 (char *)name); 654 655 (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor); 656 657 if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, 658 minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 659 ddi_soft_state_free(zfsdev_state, minor); 660 dmu_objset_disown(os, FTAG); 661 mutex_exit(&zfsdev_state_lock); 662 return (SET_ERROR(EAGAIN)); 663 } 664 665 (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor); 666 667 if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, 668 minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 669 ddi_remove_minor_node(zfs_dip, chrbuf); 670 ddi_soft_state_free(zfsdev_state, minor); 671 dmu_objset_disown(os, FTAG); 672 mutex_exit(&zfsdev_state_lock); 673 return (SET_ERROR(EAGAIN)); 674 } 675 676 zs = ddi_get_soft_state(zfsdev_state, minor); 677 zs->zss_type = ZSST_ZVOL; 678 zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); 679#else /* !illumos */ 680 681 zv = kmem_zalloc(sizeof(*zv), KM_SLEEP); 682 zv->zv_state = 0; 683 error = dsl_prop_get_integer(name, 684 zfs_prop_to_name(ZFS_PROP_VOLMODE), &mode, NULL); 685 if (error != 0 || mode == ZFS_VOLMODE_DEFAULT) 686 mode = volmode; 687 688 DROP_GIANT(); 689 zv->zv_volmode = mode; 690 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 691 g_topology_lock(); 692 gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name); 693 gp->start = zvol_geom_start; 694 gp->access = zvol_geom_access; 695 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name); 696 pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; 697 pp->sectorsize = DEV_BSIZE; 698 pp->mediasize = 0; 699 pp->private = zv; 700 701 zv->zv_provider = pp; 702 bioq_init(&zv->zv_queue); 703 mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF); 704 } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 705 struct make_dev_args args; 706 707 make_dev_args_init(&args); 708 args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 709 args.mda_devsw = &zvol_cdevsw; 710 args.mda_cr = NULL; 711 args.mda_uid = UID_ROOT; 712 args.mda_gid = GID_OPERATOR; 713 args.mda_mode = 0640; 714 args.mda_si_drv2 = zv; 715 error = make_dev_s(&args, &zv->zv_dev, 716 "%s/%s", ZVOL_DRIVER, name); 717 if (error != 0) { 718 kmem_free(zv, sizeof(*zv)); 719 dmu_objset_disown(os, FTAG); 720 mutex_exit(&zfsdev_state_lock); 721 return (error); 722 } 723 zv->zv_dev->si_iosize_max = MAXPHYS; 724 } 725 LIST_INSERT_HEAD(&all_zvols, zv, zv_links); 726#endif /* illumos */ 727 728 (void) strlcpy(zv->zv_name, name, MAXPATHLEN); 729 zv->zv_min_bs = DEV_BSHIFT; 730#ifdef illumos 731 zv->zv_minor = minor; 732#endif 733 zv->zv_objset = os; 734 if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) 735 zv->zv_flags |= ZVOL_RDONLY; 736 mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); 737 avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, 738 sizeof (rl_t), offsetof(rl_t, r_node)); 739 list_create(&zv->zv_extents, sizeof (zvol_extent_t), 740 offsetof(zvol_extent_t, ze_node)); 741#ifdef illumos 742 /* get and cache the blocksize */ 743 error = dmu_object_info(os, ZVOL_OBJ, &doi); 744 ASSERT(error == 0); 745 zv->zv_volblocksize = doi.doi_data_block_size; 746#endif 747 748 if (spa_writeable(dmu_objset_spa(os))) { 749 if (zil_replay_disable) 750 zil_destroy(dmu_objset_zil(os), B_FALSE); 751 else 752 zil_replay(os, zv, zvol_replay_vector); 753 } 754 dmu_objset_disown(os, FTAG); 755 zv->zv_objset = NULL; 756 757 zvol_minors++; 758 759 mutex_exit(&zfsdev_state_lock); 760#ifndef illumos 761 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 762 zvol_geom_run(zv); 763 g_topology_unlock(); 764 } 765 PICKUP_GIANT(); 766 767 ZFS_LOG(1, "ZVOL %s created.", name); 768#endif 769 770 return (0); 771} 772 773/* 774 * Remove minor node for the specified volume. 775 */ 776static int 777zvol_remove_zv(zvol_state_t *zv) 778{ 779#ifdef illumos 780 char nmbuf[20]; 781 minor_t minor = zv->zv_minor; 782#endif 783 784 ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 785 if (zv->zv_total_opens != 0) 786 return (SET_ERROR(EBUSY)); 787 788#ifdef illumos 789 (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor); 790 ddi_remove_minor_node(zfs_dip, nmbuf); 791 792 (void) snprintf(nmbuf, sizeof (nmbuf), "%u", minor); 793 ddi_remove_minor_node(zfs_dip, nmbuf); 794#else 795 ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); 796 797 LIST_REMOVE(zv, zv_links); 798 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 799 g_topology_lock(); 800 zvol_geom_destroy(zv); 801 g_topology_unlock(); 802 } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 803 if (zv->zv_dev != NULL) 804 destroy_dev(zv->zv_dev); 805 } 806#endif 807 808 avl_destroy(&zv->zv_znode.z_range_avl); 809 mutex_destroy(&zv->zv_znode.z_range_lock); 810 811 kmem_free(zv, sizeof (zvol_state_t)); 812#ifdef illumos 813 ddi_soft_state_free(zfsdev_state, minor); 814#endif 815 zvol_minors--; 816 return (0); 817} 818 819int 820zvol_remove_minor(const char *name) 821{ 822 zvol_state_t *zv; 823 int rc; 824 825 mutex_enter(&zfsdev_state_lock); 826 if ((zv = zvol_minor_lookup(name)) == NULL) { 827 mutex_exit(&zfsdev_state_lock); 828 return (SET_ERROR(ENXIO)); 829 } 830 rc = zvol_remove_zv(zv); 831 mutex_exit(&zfsdev_state_lock); 832 return (rc); 833} 834 835int 836zvol_first_open(zvol_state_t *zv) 837{ 838 dmu_object_info_t doi; 839 objset_t *os; 840 uint64_t volsize; 841 int error; 842 uint64_t readonly; 843 844 /* lie and say we're read-only */ 845 error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, 846 zvol_tag, &os); 847 if (error) 848 return (error); 849 850 zv->zv_objset = os; 851 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 852 if (error) { 853 ASSERT(error == 0); 854 dmu_objset_disown(os, zvol_tag); 855 return (error); 856 } 857 858 /* get and cache the blocksize */ 859 error = dmu_object_info(os, ZVOL_OBJ, &doi); 860 if (error) { 861 ASSERT(error == 0); 862 dmu_objset_disown(os, zvol_tag); 863 return (error); 864 } 865 zv->zv_volblocksize = doi.doi_data_block_size; 866 867 error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); 868 if (error) { 869 dmu_objset_disown(os, zvol_tag); 870 return (error); 871 } 872 873 zvol_size_changed(zv, volsize); 874 zv->zv_zilog = zil_open(os, zvol_get_data); 875 876 VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly, 877 NULL) == 0); 878 if (readonly || dmu_objset_is_snapshot(os) || 879 !spa_writeable(dmu_objset_spa(os))) 880 zv->zv_flags |= ZVOL_RDONLY; 881 else 882 zv->zv_flags &= ~ZVOL_RDONLY; 883 return (error); 884} 885 886void 887zvol_last_close(zvol_state_t *zv) 888{ 889 zil_close(zv->zv_zilog); 890 zv->zv_zilog = NULL; 891 892 dmu_buf_rele(zv->zv_dbuf, zvol_tag); 893 zv->zv_dbuf = NULL; 894 895 /* 896 * Evict cached data 897 */ 898 if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && 899 !(zv->zv_flags & ZVOL_RDONLY)) 900 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 901 dmu_objset_evict_dbufs(zv->zv_objset); 902 903 dmu_objset_disown(zv->zv_objset, zvol_tag); 904 zv->zv_objset = NULL; 905} 906 907#ifdef illumos 908int 909zvol_prealloc(zvol_state_t *zv) 910{ 911 objset_t *os = zv->zv_objset; 912 dmu_tx_t *tx; 913 uint64_t refd, avail, usedobjs, availobjs; 914 uint64_t resid = zv->zv_volsize; 915 uint64_t off = 0; 916 917 /* Check the space usage before attempting to allocate the space */ 918 dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); 919 if (avail < zv->zv_volsize) 920 return (SET_ERROR(ENOSPC)); 921 922 /* Free old extents if they exist */ 923 zvol_free_extents(zv); 924 925 while (resid != 0) { 926 int error; 927 uint64_t bytes = MIN(resid, SPA_OLD_MAXBLOCKSIZE); 928 929 tx = dmu_tx_create(os); 930 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 931 error = dmu_tx_assign(tx, TXG_WAIT); 932 if (error) { 933 dmu_tx_abort(tx); 934 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); 935 return (error); 936 } 937 dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx); 938 dmu_tx_commit(tx); 939 off += bytes; 940 resid -= bytes; 941 } 942 txg_wait_synced(dmu_objset_pool(os), 0); 943 944 return (0); 945} 946#endif /* illumos */ 947 948static int 949zvol_update_volsize(objset_t *os, uint64_t volsize) 950{ 951 dmu_tx_t *tx; 952 int error; 953 954 ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 955 956 tx = dmu_tx_create(os); 957 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 958 dmu_tx_mark_netfree(tx); 959 error = dmu_tx_assign(tx, TXG_WAIT); 960 if (error) { 961 dmu_tx_abort(tx); 962 return (error); 963 } 964 965 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, 966 &volsize, tx); 967 dmu_tx_commit(tx); 968 969 if (error == 0) 970 error = dmu_free_long_range(os, 971 ZVOL_OBJ, volsize, DMU_OBJECT_END); 972 return (error); 973} 974 975void 976zvol_remove_minors(const char *name) 977{ 978#ifdef illumos 979 zvol_state_t *zv; 980 char *namebuf; 981 minor_t minor; 982 983 namebuf = kmem_zalloc(strlen(name) + 2, KM_SLEEP); 984 (void) strncpy(namebuf, name, strlen(name)); 985 (void) strcat(namebuf, "/"); 986 mutex_enter(&zfsdev_state_lock); 987 for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { 988 989 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 990 if (zv == NULL) 991 continue; 992 if (strncmp(namebuf, zv->zv_name, strlen(namebuf)) == 0) 993 (void) zvol_remove_zv(zv); 994 } 995 kmem_free(namebuf, strlen(name) + 2); 996 997 mutex_exit(&zfsdev_state_lock); 998#else /* !illumos */ 999 zvol_state_t *zv, *tzv; 1000 size_t namelen; 1001 1002 namelen = strlen(name); 1003 1004 DROP_GIANT(); 1005 mutex_enter(&zfsdev_state_lock); 1006 1007 LIST_FOREACH_SAFE(zv, &all_zvols, zv_links, tzv) { 1008 if (strcmp(zv->zv_name, name) == 0 || 1009 (strncmp(zv->zv_name, name, namelen) == 0 && 1010 strlen(zv->zv_name) > namelen && (zv->zv_name[namelen] == '/' || 1011 zv->zv_name[namelen] == '@'))) { 1012 (void) zvol_remove_zv(zv); 1013 } 1014 } 1015 1016 mutex_exit(&zfsdev_state_lock); 1017 PICKUP_GIANT(); 1018#endif /* illumos */ 1019} 1020 1021static int 1022zvol_update_live_volsize(zvol_state_t *zv, uint64_t volsize) 1023{ 1024 uint64_t old_volsize = 0ULL; 1025 int error = 0; 1026 1027 ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 1028 1029 /* 1030 * Reinitialize the dump area to the new size. If we 1031 * failed to resize the dump area then restore it back to 1032 * its original size. We must set the new volsize prior 1033 * to calling dumpvp_resize() to ensure that the devices' 1034 * size(9P) is not visible by the dump subsystem. 1035 */ 1036 old_volsize = zv->zv_volsize; 1037 zvol_size_changed(zv, volsize); 1038 1039#ifdef ZVOL_DUMP 1040 if (zv->zv_flags & ZVOL_DUMPIFIED) { 1041 if ((error = zvol_dumpify(zv)) != 0 || 1042 (error = dumpvp_resize()) != 0) { 1043 int dumpify_error; 1044 1045 (void) zvol_update_volsize(zv->zv_objset, old_volsize); 1046 zvol_size_changed(zv, old_volsize); 1047 dumpify_error = zvol_dumpify(zv); 1048 error = dumpify_error ? dumpify_error : error; 1049 } 1050 } 1051#endif /* ZVOL_DUMP */ 1052 1053#ifdef illumos 1054 /* 1055 * Generate a LUN expansion event. 1056 */ 1057 if (error == 0) { 1058 sysevent_id_t eid; 1059 nvlist_t *attr; 1060 char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1061 1062 (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV, 1063 zv->zv_minor); 1064 1065 VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1066 VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 1067 1068 (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 1069 ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 1070 1071 nvlist_free(attr); 1072 kmem_free(physpath, MAXPATHLEN); 1073 } 1074#endif /* illumos */ 1075 return (error); 1076} 1077 1078int 1079zvol_set_volsize(const char *name, uint64_t volsize) 1080{ 1081 zvol_state_t *zv = NULL; 1082 objset_t *os; 1083 int error; 1084 dmu_object_info_t doi; 1085 uint64_t readonly; 1086 boolean_t owned = B_FALSE; 1087 1088 error = dsl_prop_get_integer(name, 1089 zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL); 1090 if (error != 0) 1091 return (error); 1092 if (readonly) 1093 return (SET_ERROR(EROFS)); 1094 1095 mutex_enter(&zfsdev_state_lock); 1096 zv = zvol_minor_lookup(name); 1097 1098 if (zv == NULL || zv->zv_objset == NULL) { 1099 if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, 1100 FTAG, &os)) != 0) { 1101 mutex_exit(&zfsdev_state_lock); 1102 return (error); 1103 } 1104 owned = B_TRUE; 1105 if (zv != NULL) 1106 zv->zv_objset = os; 1107 } else { 1108 os = zv->zv_objset; 1109 } 1110 1111 if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || 1112 (error = zvol_check_volsize(volsize, doi.doi_data_block_size)) != 0) 1113 goto out; 1114 1115 error = zvol_update_volsize(os, volsize); 1116 1117 if (error == 0 && zv != NULL) 1118 error = zvol_update_live_volsize(zv, volsize); 1119out: 1120 if (owned) { 1121 dmu_objset_disown(os, FTAG); 1122 if (zv != NULL) 1123 zv->zv_objset = NULL; 1124 } 1125 mutex_exit(&zfsdev_state_lock); 1126 return (error); 1127} 1128 1129/*ARGSUSED*/ 1130#ifdef illumos 1131int 1132zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) 1133#else 1134static int 1135zvol_open(struct g_provider *pp, int flag, int count) 1136#endif 1137{ 1138 zvol_state_t *zv; 1139 int err = 0; 1140#ifdef illumos 1141 1142 mutex_enter(&zfsdev_state_lock); 1143 1144 zv = zfsdev_get_soft_state(getminor(*devp), ZSST_ZVOL); 1145 if (zv == NULL) { 1146 mutex_exit(&zfsdev_state_lock); 1147 return (SET_ERROR(ENXIO)); 1148 } 1149 1150 if (zv->zv_total_opens == 0) 1151 err = zvol_first_open(zv); 1152 if (err) { 1153 mutex_exit(&zfsdev_state_lock); 1154 return (err); 1155 } 1156#else /* !illumos */ 1157 boolean_t locked = B_FALSE; 1158 1159 if (!zpool_on_zvol && tsd_get(zfs_geom_probe_vdev_key) != NULL) { 1160 /* 1161 * if zfs_geom_probe_vdev_key is set, that means that zfs is 1162 * attempting to probe geom providers while looking for a 1163 * replacement for a missing VDEV. In this case, the 1164 * spa_namespace_lock will not be held, but it is still illegal 1165 * to use a zvol as a vdev. Deadlocks can result if another 1166 * thread has spa_namespace_lock 1167 */ 1168 return (EOPNOTSUPP); 1169 } 1170 /* 1171 * Protect against recursively entering spa_namespace_lock 1172 * when spa_open() is used for a pool on a (local) ZVOL(s). 1173 * This is needed since we replaced upstream zfsdev_state_lock 1174 * with spa_namespace_lock in the ZVOL code. 1175 * We are using the same trick as spa_open(). 1176 * Note that calls in zvol_first_open which need to resolve 1177 * pool name to a spa object will enter spa_open() 1178 * recursively, but that function already has all the 1179 * necessary protection. 1180 */ 1181 if (!MUTEX_HELD(&zfsdev_state_lock)) { 1182 mutex_enter(&zfsdev_state_lock); 1183 locked = B_TRUE; 1184 } 1185 1186 zv = pp->private; 1187 if (zv == NULL) { 1188 if (locked) 1189 mutex_exit(&zfsdev_state_lock); 1190 return (SET_ERROR(ENXIO)); 1191 } 1192 1193 if (zv->zv_total_opens == 0) { 1194 err = zvol_first_open(zv); 1195 if (err) { 1196 if (locked) 1197 mutex_exit(&zfsdev_state_lock); 1198 return (err); 1199 } 1200 pp->mediasize = zv->zv_volsize; 1201 pp->stripeoffset = 0; 1202 pp->stripesize = zv->zv_volblocksize; 1203 } 1204#endif /* illumos */ 1205 if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 1206 err = SET_ERROR(EROFS); 1207 goto out; 1208 } 1209 if (zv->zv_flags & ZVOL_EXCL) { 1210 err = SET_ERROR(EBUSY); 1211 goto out; 1212 } 1213#ifdef FEXCL 1214 if (flag & FEXCL) { 1215 if (zv->zv_total_opens != 0) { 1216 err = SET_ERROR(EBUSY); 1217 goto out; 1218 } 1219 zv->zv_flags |= ZVOL_EXCL; 1220 } 1221#endif 1222 1223#ifdef illumos 1224 if (zv->zv_open_count[otyp] == 0 || otyp == OTYP_LYR) { 1225 zv->zv_open_count[otyp]++; 1226 zv->zv_total_opens++; 1227 } 1228 mutex_exit(&zfsdev_state_lock); 1229#else 1230 zv->zv_total_opens += count; 1231 if (locked) 1232 mutex_exit(&zfsdev_state_lock); 1233#endif 1234 1235 return (err); 1236out: 1237 if (zv->zv_total_opens == 0) 1238 zvol_last_close(zv); 1239#ifdef illumos 1240 mutex_exit(&zfsdev_state_lock); 1241#else 1242 if (locked) 1243 mutex_exit(&zfsdev_state_lock); 1244#endif 1245 return (err); 1246} 1247 1248/*ARGSUSED*/ 1249#ifdef illumos 1250int 1251zvol_close(dev_t dev, int flag, int otyp, cred_t *cr) 1252{ 1253 minor_t minor = getminor(dev); 1254 zvol_state_t *zv; 1255 int error = 0; 1256 1257 mutex_enter(&zfsdev_state_lock); 1258 1259 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1260 if (zv == NULL) { 1261 mutex_exit(&zfsdev_state_lock); 1262#else /* !illumos */ 1263static int 1264zvol_close(struct g_provider *pp, int flag, int count) 1265{ 1266 zvol_state_t *zv; 1267 int error = 0; 1268 boolean_t locked = B_FALSE; 1269 1270 /* See comment in zvol_open(). */ 1271 if (!MUTEX_HELD(&zfsdev_state_lock)) { 1272 mutex_enter(&zfsdev_state_lock); 1273 locked = B_TRUE; 1274 } 1275 1276 zv = pp->private; 1277 if (zv == NULL) { 1278 if (locked) 1279 mutex_exit(&zfsdev_state_lock); 1280#endif /* illumos */ 1281 return (SET_ERROR(ENXIO)); 1282 } 1283 1284 if (zv->zv_flags & ZVOL_EXCL) { 1285 ASSERT(zv->zv_total_opens == 1); 1286 zv->zv_flags &= ~ZVOL_EXCL; 1287 } 1288 1289 /* 1290 * If the open count is zero, this is a spurious close. 1291 * That indicates a bug in the kernel / DDI framework. 1292 */ 1293#ifdef illumos 1294 ASSERT(zv->zv_open_count[otyp] != 0); 1295#endif 1296 ASSERT(zv->zv_total_opens != 0); 1297 1298 /* 1299 * You may get multiple opens, but only one close. 1300 */ 1301#ifdef illumos 1302 zv->zv_open_count[otyp]--; 1303 zv->zv_total_opens--; 1304#else 1305 zv->zv_total_opens -= count; 1306#endif 1307 1308 if (zv->zv_total_opens == 0) 1309 zvol_last_close(zv); 1310 1311#ifdef illumos 1312 mutex_exit(&zfsdev_state_lock); 1313#else 1314 if (locked) 1315 mutex_exit(&zfsdev_state_lock); 1316#endif 1317 return (error); 1318} 1319 1320static void 1321zvol_get_done(zgd_t *zgd, int error) 1322{ 1323 if (zgd->zgd_db) 1324 dmu_buf_rele(zgd->zgd_db, zgd); 1325 1326 zfs_range_unlock(zgd->zgd_rl); 1327 1328 if (error == 0 && zgd->zgd_bp) 1329 zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); 1330 1331 kmem_free(zgd, sizeof (zgd_t)); 1332} 1333 1334/* 1335 * Get data to generate a TX_WRITE intent log record. 1336 */ 1337static int 1338zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) 1339{ 1340 zvol_state_t *zv = arg; 1341 objset_t *os = zv->zv_objset; 1342 uint64_t object = ZVOL_OBJ; 1343 uint64_t offset = lr->lr_offset; 1344 uint64_t size = lr->lr_length; /* length of user data */ 1345 dmu_buf_t *db; 1346 zgd_t *zgd; 1347 int error; 1348 1349 ASSERT3P(lwb, !=, NULL); 1350 ASSERT3P(zio, !=, NULL); 1351 ASSERT3U(size, !=, 0); 1352 1353 zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1354 zgd->zgd_lwb = lwb; 1355 1356 /* 1357 * Write records come in two flavors: immediate and indirect. 1358 * For small writes it's cheaper to store the data with the 1359 * log record (immediate); for large writes it's cheaper to 1360 * sync the data and get a pointer to it (indirect) so that 1361 * we don't have to write the data twice. 1362 */ 1363 if (buf != NULL) { /* immediate write */ 1364 zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, 1365 RL_READER); 1366 error = dmu_read(os, object, offset, size, buf, 1367 DMU_READ_NO_PREFETCH); 1368 } else { /* indirect write */ 1369 /* 1370 * Have to lock the whole block to ensure when it's written out 1371 * and its checksum is being calculated that no one can change 1372 * the data. Contrarily to zfs_get_data we need not re-check 1373 * blocksize after we get the lock because it cannot be changed. 1374 */ 1375 size = zv->zv_volblocksize; 1376 offset = P2ALIGN(offset, size); 1377 zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, 1378 RL_READER); 1379 error = dmu_buf_hold(os, object, offset, zgd, &db, 1380 DMU_READ_NO_PREFETCH); 1381 if (error == 0) { 1382 blkptr_t *bp = &lr->lr_blkptr; 1383 1384 zgd->zgd_db = db; 1385 zgd->zgd_bp = bp; 1386 1387 ASSERT(db->db_offset == offset); 1388 ASSERT(db->db_size == size); 1389 1390 error = dmu_sync(zio, lr->lr_common.lrc_txg, 1391 zvol_get_done, zgd); 1392 1393 if (error == 0) 1394 return (0); 1395 } 1396 } 1397 1398 zvol_get_done(zgd, error); 1399 1400 return (error); 1401} 1402 1403/* 1404 * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. 1405 * 1406 * We store data in the log buffers if it's small enough. 1407 * Otherwise we will later flush the data out via dmu_sync(). 1408 */ 1409ssize_t zvol_immediate_write_sz = 32768; 1410#ifdef _KERNEL 1411SYSCTL_LONG(_vfs_zfs_vol, OID_AUTO, immediate_write_sz, CTLFLAG_RWTUN, 1412 &zvol_immediate_write_sz, 0, "Minimal size for indirect log write"); 1413#endif 1414 1415static void 1416zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, 1417 boolean_t sync) 1418{ 1419 uint32_t blocksize = zv->zv_volblocksize; 1420 zilog_t *zilog = zv->zv_zilog; 1421 itx_wr_state_t write_state; 1422 1423 if (zil_replaying(zilog, tx)) 1424 return; 1425 1426 if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) 1427 write_state = WR_INDIRECT; 1428 else if (!spa_has_slogs(zilog->zl_spa) && 1429 resid >= blocksize && blocksize > zvol_immediate_write_sz) 1430 write_state = WR_INDIRECT; 1431 else if (sync) 1432 write_state = WR_COPIED; 1433 else 1434 write_state = WR_NEED_COPY; 1435 1436 while (resid) { 1437 itx_t *itx; 1438 lr_write_t *lr; 1439 itx_wr_state_t wr_state = write_state; 1440 ssize_t len = resid; 1441 1442 if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA) 1443 wr_state = WR_NEED_COPY; 1444 else if (wr_state == WR_INDIRECT) 1445 len = MIN(blocksize - P2PHASE(off, blocksize), resid); 1446 1447 itx = zil_itx_create(TX_WRITE, sizeof (*lr) + 1448 (wr_state == WR_COPIED ? len : 0)); 1449 lr = (lr_write_t *)&itx->itx_lr; 1450 if (wr_state == WR_COPIED && dmu_read(zv->zv_objset, 1451 ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { 1452 zil_itx_destroy(itx); 1453 itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1454 lr = (lr_write_t *)&itx->itx_lr; 1455 wr_state = WR_NEED_COPY; 1456 } 1457 1458 itx->itx_wr_state = wr_state; 1459 lr->lr_foid = ZVOL_OBJ; 1460 lr->lr_offset = off; 1461 lr->lr_length = len; 1462 lr->lr_blkoff = 0; 1463 BP_ZERO(&lr->lr_blkptr); 1464 1465 itx->itx_private = zv; 1466 1467 if (!sync && (zv->zv_sync_cnt == 0)) 1468 itx->itx_sync = B_FALSE; 1469 1470 zil_itx_assign(zilog, itx, tx); 1471 1472 off += len; 1473 resid -= len; 1474 } 1475} 1476 1477#ifdef illumos 1478static int 1479zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset, 1480 uint64_t size, boolean_t doread, boolean_t isdump) 1481{ 1482 vdev_disk_t *dvd; 1483 int c; 1484 int numerrors = 0; 1485 1486 if (vd->vdev_ops == &vdev_mirror_ops || 1487 vd->vdev_ops == &vdev_replacing_ops || 1488 vd->vdev_ops == &vdev_spare_ops) { 1489 for (c = 0; c < vd->vdev_children; c++) { 1490 int err = zvol_dumpio_vdev(vd->vdev_child[c], 1491 addr, offset, origoffset, size, doread, isdump); 1492 if (err != 0) { 1493 numerrors++; 1494 } else if (doread) { 1495 break; 1496 } 1497 } 1498 } 1499 1500 if (!vd->vdev_ops->vdev_op_leaf && vd->vdev_ops != &vdev_raidz_ops) 1501 return (numerrors < vd->vdev_children ? 0 : EIO); 1502 1503 if (doread && !vdev_readable(vd)) 1504 return (SET_ERROR(EIO)); 1505 else if (!doread && !vdev_writeable(vd)) 1506 return (SET_ERROR(EIO)); 1507 1508 if (vd->vdev_ops == &vdev_raidz_ops) { 1509 return (vdev_raidz_physio(vd, 1510 addr, size, offset, origoffset, doread, isdump)); 1511 } 1512 1513 offset += VDEV_LABEL_START_SIZE; 1514 1515 if (ddi_in_panic() || isdump) { 1516 ASSERT(!doread); 1517 if (doread) 1518 return (SET_ERROR(EIO)); 1519 dvd = vd->vdev_tsd; 1520 ASSERT3P(dvd, !=, NULL); 1521 return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), 1522 lbtodb(size))); 1523 } else { 1524 dvd = vd->vdev_tsd; 1525 ASSERT3P(dvd, !=, NULL); 1526 return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size, 1527 offset, doread ? B_READ : B_WRITE)); 1528 } 1529} 1530 1531static int 1532zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, 1533 boolean_t doread, boolean_t isdump) 1534{ 1535 vdev_t *vd; 1536 int error; 1537 zvol_extent_t *ze; 1538 spa_t *spa = dmu_objset_spa(zv->zv_objset); 1539 1540 /* Must be sector aligned, and not stradle a block boundary. */ 1541 if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || 1542 P2BOUNDARY(offset, size, zv->zv_volblocksize)) { 1543 return (SET_ERROR(EINVAL)); 1544 } 1545 ASSERT(size <= zv->zv_volblocksize); 1546 1547 /* Locate the extent this belongs to */ 1548 ze = list_head(&zv->zv_extents); 1549 while (offset >= ze->ze_nblks * zv->zv_volblocksize) { 1550 offset -= ze->ze_nblks * zv->zv_volblocksize; 1551 ze = list_next(&zv->zv_extents, ze); 1552 } 1553 1554 if (ze == NULL) 1555 return (SET_ERROR(EINVAL)); 1556 1557 if (!ddi_in_panic()) 1558 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 1559 1560 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); 1561 offset += DVA_GET_OFFSET(&ze->ze_dva); 1562 error = zvol_dumpio_vdev(vd, addr, offset, DVA_GET_OFFSET(&ze->ze_dva), 1563 size, doread, isdump); 1564 1565 if (!ddi_in_panic()) 1566 spa_config_exit(spa, SCL_STATE, FTAG); 1567 1568 return (error); 1569} 1570 1571int 1572zvol_strategy(buf_t *bp) 1573{ 1574 zfs_soft_state_t *zs = NULL; 1575#else /* !illumos */ 1576void 1577zvol_strategy(struct bio *bp) 1578{ 1579#endif /* illumos */ 1580 zvol_state_t *zv; 1581 uint64_t off, volsize; 1582 size_t resid; 1583 char *addr; 1584 objset_t *os; 1585 rl_t *rl; 1586 int error = 0; 1587#ifdef illumos 1588 boolean_t doread = bp->b_flags & B_READ; 1589#else 1590 boolean_t doread = 0; 1591#endif 1592 boolean_t is_dumpified; 1593 boolean_t sync; 1594 1595#ifdef illumos 1596 if (getminor(bp->b_edev) == 0) { 1597 error = SET_ERROR(EINVAL); 1598 } else { 1599 zs = ddi_get_soft_state(zfsdev_state, getminor(bp->b_edev)); 1600 if (zs == NULL) 1601 error = SET_ERROR(ENXIO); 1602 else if (zs->zss_type != ZSST_ZVOL) 1603 error = SET_ERROR(EINVAL); 1604 } 1605 1606 if (error) { 1607 bioerror(bp, error); 1608 biodone(bp); 1609 return (0); 1610 } 1611 1612 zv = zs->zss_data; 1613 1614 if (!(bp->b_flags & B_READ) && (zv->zv_flags & ZVOL_RDONLY)) { 1615 bioerror(bp, EROFS); 1616 biodone(bp); 1617 return (0); 1618 } 1619 1620 off = ldbtob(bp->b_blkno); 1621#else /* !illumos */ 1622 if (bp->bio_to) 1623 zv = bp->bio_to->private; 1624 else 1625 zv = bp->bio_dev->si_drv2; 1626 1627 if (zv == NULL) { 1628 error = SET_ERROR(ENXIO); 1629 goto out; 1630 } 1631 1632 if (bp->bio_cmd != BIO_READ && (zv->zv_flags & ZVOL_RDONLY)) { 1633 error = SET_ERROR(EROFS); 1634 goto out; 1635 } 1636 1637 switch (bp->bio_cmd) { 1638 case BIO_FLUSH: 1639 goto sync; 1640 case BIO_READ: 1641 doread = 1; 1642 case BIO_WRITE: 1643 case BIO_DELETE: 1644 break; 1645 default: 1646 error = EOPNOTSUPP; 1647 goto out; 1648 } 1649 1650 off = bp->bio_offset; 1651#endif /* illumos */ 1652 volsize = zv->zv_volsize; 1653 1654 os = zv->zv_objset; 1655 ASSERT(os != NULL); 1656 1657#ifdef illumos 1658 bp_mapin(bp); 1659 addr = bp->b_un.b_addr; 1660 resid = bp->b_bcount; 1661 1662 if (resid > 0 && (off < 0 || off >= volsize)) { 1663 bioerror(bp, EIO); 1664 biodone(bp); 1665 return (0); 1666 } 1667 1668 is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED; 1669 sync = ((!(bp->b_flags & B_ASYNC) && 1670 !(zv->zv_flags & ZVOL_WCE)) || 1671 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) && 1672 !doread && !is_dumpified; 1673#else /* !illumos */ 1674 addr = bp->bio_data; 1675 resid = bp->bio_length; 1676 1677 if (resid > 0 && (off < 0 || off >= volsize)) { 1678 error = SET_ERROR(EIO); 1679 goto out; 1680 } 1681 1682 is_dumpified = B_FALSE; 1683 sync = !doread && !is_dumpified && 1684 zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; 1685#endif /* illumos */ 1686 1687 /* 1688 * There must be no buffer changes when doing a dmu_sync() because 1689 * we can't change the data whilst calculating the checksum. 1690 */ 1691 rl = zfs_range_lock(&zv->zv_znode, off, resid, 1692 doread ? RL_READER : RL_WRITER); 1693 1694#ifndef illumos 1695 if (bp->bio_cmd == BIO_DELETE) { 1696 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 1697 error = dmu_tx_assign(tx, TXG_WAIT); 1698 if (error != 0) { 1699 dmu_tx_abort(tx); 1700 } else { 1701 zvol_log_truncate(zv, tx, off, resid, sync); 1702 dmu_tx_commit(tx); 1703 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 1704 off, resid); 1705 resid = 0; 1706 } 1707 goto unlock; 1708 } 1709#endif 1710 while (resid != 0 && off < volsize) { 1711 size_t size = MIN(resid, zvol_maxphys); 1712#ifdef illumos 1713 if (is_dumpified) { 1714 size = MIN(size, P2END(off, zv->zv_volblocksize) - off); 1715 error = zvol_dumpio(zv, addr, off, size, 1716 doread, B_FALSE); 1717 } else if (doread) { 1718#else 1719 if (doread) { 1720#endif 1721 error = dmu_read(os, ZVOL_OBJ, off, size, addr, 1722 DMU_READ_PREFETCH); 1723 } else { 1724 dmu_tx_t *tx = dmu_tx_create(os); 1725 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 1726 error = dmu_tx_assign(tx, TXG_WAIT); 1727 if (error) { 1728 dmu_tx_abort(tx); 1729 } else { 1730 dmu_write(os, ZVOL_OBJ, off, size, addr, tx); 1731 zvol_log_write(zv, tx, off, size, sync); 1732 dmu_tx_commit(tx); 1733 } 1734 } 1735 if (error) { 1736 /* convert checksum errors into IO errors */ 1737 if (error == ECKSUM) 1738 error = SET_ERROR(EIO); 1739 break; 1740 } 1741 off += size; 1742 addr += size; 1743 resid -= size; 1744 } 1745#ifndef illumos 1746unlock: 1747#endif 1748 zfs_range_unlock(rl); 1749 1750#ifdef illumos 1751 if ((bp->b_resid = resid) == bp->b_bcount) 1752 bioerror(bp, off > volsize ? EINVAL : error); 1753 1754 if (sync) 1755 zil_commit(zv->zv_zilog, ZVOL_OBJ); 1756 biodone(bp); 1757 1758 return (0); 1759#else /* !illumos */ 1760 bp->bio_completed = bp->bio_length - resid; 1761 if (bp->bio_completed < bp->bio_length && off > volsize) 1762 error = EINVAL; 1763 1764 if (sync) { 1765sync: 1766 zil_commit(zv->zv_zilog, ZVOL_OBJ); 1767 } 1768out: 1769 if (bp->bio_to) 1770 g_io_deliver(bp, error); 1771 else 1772 biofinish(bp, NULL, error); 1773#endif /* illumos */ 1774} 1775 1776#ifdef illumos 1777/* 1778 * Set the buffer count to the zvol maximum transfer. 1779 * Using our own routine instead of the default minphys() 1780 * means that for larger writes we write bigger buffers on X86 1781 * (128K instead of 56K) and flush the disk write cache less often 1782 * (every zvol_maxphys - currently 1MB) instead of minphys (currently 1783 * 56K on X86 and 128K on sparc). 1784 */ 1785void 1786zvol_minphys(struct buf *bp) 1787{ 1788 if (bp->b_bcount > zvol_maxphys) 1789 bp->b_bcount = zvol_maxphys; 1790} 1791 1792int 1793zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) 1794{ 1795 minor_t minor = getminor(dev); 1796 zvol_state_t *zv; 1797 int error = 0; 1798 uint64_t size; 1799 uint64_t boff; 1800 uint64_t resid; 1801 1802 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1803 if (zv == NULL) 1804 return (SET_ERROR(ENXIO)); 1805 1806 if ((zv->zv_flags & ZVOL_DUMPIFIED) == 0) 1807 return (SET_ERROR(EINVAL)); 1808 1809 boff = ldbtob(blkno); 1810 resid = ldbtob(nblocks); 1811 1812 VERIFY3U(boff + resid, <=, zv->zv_volsize); 1813 1814 while (resid) { 1815 size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); 1816 error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE); 1817 if (error) 1818 break; 1819 boff += size; 1820 addr += size; 1821 resid -= size; 1822 } 1823 1824 return (error); 1825} 1826 1827/*ARGSUSED*/ 1828int 1829zvol_read(dev_t dev, uio_t *uio, cred_t *cr) 1830{ 1831 minor_t minor = getminor(dev); 1832#else /* !illumos */ 1833int 1834zvol_read(struct cdev *dev, struct uio *uio, int ioflag) 1835{ 1836#endif /* illumos */ 1837 zvol_state_t *zv; 1838 uint64_t volsize; 1839 rl_t *rl; 1840 int error = 0; 1841 1842#ifdef illumos 1843 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1844 if (zv == NULL) 1845 return (SET_ERROR(ENXIO)); 1846#else 1847 zv = dev->si_drv2; 1848#endif 1849 1850 volsize = zv->zv_volsize; 1851 /* uio_loffset == volsize isn't an error as its required for EOF processing. */ 1852 if (uio->uio_resid > 0 && 1853 (uio->uio_loffset < 0 || uio->uio_loffset > volsize)) 1854 return (SET_ERROR(EIO)); 1855 1856#ifdef illumos 1857 if (zv->zv_flags & ZVOL_DUMPIFIED) { 1858 error = physio(zvol_strategy, NULL, dev, B_READ, 1859 zvol_minphys, uio); 1860 return (error); 1861 } 1862#endif 1863 1864 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1865 RL_READER); 1866 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1867 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1868 1869 /* don't read past the end */ 1870 if (bytes > volsize - uio->uio_loffset) 1871 bytes = volsize - uio->uio_loffset; 1872 1873 error = dmu_read_uio_dbuf(zv->zv_dbuf, uio, bytes); 1874 if (error) { 1875 /* convert checksum errors into IO errors */ 1876 if (error == ECKSUM) 1877 error = SET_ERROR(EIO); 1878 break; 1879 } 1880 } 1881 zfs_range_unlock(rl); 1882 return (error); 1883} 1884 1885#ifdef illumos 1886/*ARGSUSED*/ 1887int 1888zvol_write(dev_t dev, uio_t *uio, cred_t *cr) 1889{ 1890 minor_t minor = getminor(dev); 1891#else /* !illumos */ 1892int 1893zvol_write(struct cdev *dev, struct uio *uio, int ioflag) 1894{ 1895#endif /* illumos */ 1896 zvol_state_t *zv; 1897 uint64_t volsize; 1898 rl_t *rl; 1899 int error = 0; 1900 boolean_t sync; 1901 1902#ifdef illumos 1903 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1904 if (zv == NULL) 1905 return (SET_ERROR(ENXIO)); 1906#else 1907 zv = dev->si_drv2; 1908#endif 1909 1910 volsize = zv->zv_volsize; 1911 /* uio_loffset == volsize isn't an error as its required for EOF processing. */ 1912 if (uio->uio_resid > 0 && 1913 (uio->uio_loffset < 0 || uio->uio_loffset > volsize)) 1914 return (SET_ERROR(EIO)); 1915 1916#ifdef illumos 1917 if (zv->zv_flags & ZVOL_DUMPIFIED) { 1918 error = physio(zvol_strategy, NULL, dev, B_WRITE, 1919 zvol_minphys, uio); 1920 return (error); 1921 } 1922 1923 sync = !(zv->zv_flags & ZVOL_WCE) || 1924#else 1925 sync = (ioflag & IO_SYNC) || 1926#endif 1927 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 1928 1929 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1930 RL_WRITER); 1931 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1932 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1933 uint64_t off = uio->uio_loffset; 1934 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 1935 1936 if (bytes > volsize - off) /* don't write past the end */ 1937 bytes = volsize - off; 1938 1939 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 1940 error = dmu_tx_assign(tx, TXG_WAIT); 1941 if (error) { 1942 dmu_tx_abort(tx); 1943 break; 1944 } 1945 error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx); 1946 if (error == 0) 1947 zvol_log_write(zv, tx, off, bytes, sync); 1948 dmu_tx_commit(tx); 1949 1950 if (error) 1951 break; 1952 } 1953 zfs_range_unlock(rl); 1954 if (sync) 1955 zil_commit(zv->zv_zilog, ZVOL_OBJ); 1956 return (error); 1957} 1958 1959#ifdef illumos 1960int 1961zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) 1962{ 1963 struct uuid uuid = EFI_RESERVED; 1964 efi_gpe_t gpe = { 0 }; 1965 uint32_t crc; 1966 dk_efi_t efi; 1967 int length; 1968 char *ptr; 1969 1970 if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) 1971 return (SET_ERROR(EFAULT)); 1972 ptr = (char *)(uintptr_t)efi.dki_data_64; 1973 length = efi.dki_length; 1974 /* 1975 * Some clients may attempt to request a PMBR for the 1976 * zvol. Currently this interface will return EINVAL to 1977 * such requests. These requests could be supported by 1978 * adding a check for lba == 0 and consing up an appropriate 1979 * PMBR. 1980 */ 1981 if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) 1982 return (SET_ERROR(EINVAL)); 1983 1984 gpe.efi_gpe_StartingLBA = LE_64(34ULL); 1985 gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); 1986 UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); 1987 1988 if (efi.dki_lba == 1) { 1989 efi_gpt_t gpt = { 0 }; 1990 1991 gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1992 gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 1993 gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); 1994 gpt.efi_gpt_MyLBA = LE_64(1ULL); 1995 gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); 1996 gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1); 1997 gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1998 gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); 1999 gpt.efi_gpt_SizeOfPartitionEntry = 2000 LE_32(sizeof (efi_gpe_t)); 2001 CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); 2002 gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 2003 CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); 2004 gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); 2005 if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), 2006 flag)) 2007 return (SET_ERROR(EFAULT)); 2008 ptr += sizeof (gpt); 2009 length -= sizeof (gpt); 2010 } 2011 if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), 2012 length), flag)) 2013 return (SET_ERROR(EFAULT)); 2014 return (0); 2015} 2016 2017/* 2018 * BEGIN entry points to allow external callers access to the volume. 2019 */ 2020/* 2021 * Return the volume parameters needed for access from an external caller. 2022 * These values are invariant as long as the volume is held open. 2023 */ 2024int 2025zvol_get_volume_params(minor_t minor, uint64_t *blksize, 2026 uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl, 2027 void **rl_hdl, void **bonus_hdl) 2028{ 2029 zvol_state_t *zv; 2030 2031 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 2032 if (zv == NULL) 2033 return (SET_ERROR(ENXIO)); 2034 if (zv->zv_flags & ZVOL_DUMPIFIED) 2035 return (SET_ERROR(ENXIO)); 2036 2037 ASSERT(blksize && max_xfer_len && minor_hdl && 2038 objset_hdl && zil_hdl && rl_hdl && bonus_hdl); 2039 2040 *blksize = zv->zv_volblocksize; 2041 *max_xfer_len = (uint64_t)zvol_maxphys; 2042 *minor_hdl = zv; 2043 *objset_hdl = zv->zv_objset; 2044 *zil_hdl = zv->zv_zilog; 2045 *rl_hdl = &zv->zv_znode; 2046 *bonus_hdl = zv->zv_dbuf; 2047 return (0); 2048} 2049 2050/* 2051 * Return the current volume size to an external caller. 2052 * The size can change while the volume is open. 2053 */ 2054uint64_t 2055zvol_get_volume_size(void *minor_hdl) 2056{ 2057 zvol_state_t *zv = minor_hdl; 2058 2059 return (zv->zv_volsize); 2060} 2061 2062/* 2063 * Return the current WCE setting to an external caller. 2064 * The WCE setting can change while the volume is open. 2065 */ 2066int 2067zvol_get_volume_wce(void *minor_hdl) 2068{ 2069 zvol_state_t *zv = minor_hdl; 2070 2071 return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0); 2072} 2073 2074/* 2075 * Entry point for external callers to zvol_log_write 2076 */ 2077void 2078zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid, 2079 boolean_t sync) 2080{ 2081 zvol_state_t *zv = minor_hdl; 2082 2083 zvol_log_write(zv, tx, off, resid, sync); 2084} 2085/* 2086 * END entry points to allow external callers access to the volume. 2087 */ 2088#endif /* illumos */ 2089 2090/* 2091 * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE. 2092 */ 2093static void 2094zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len, 2095 boolean_t sync) 2096{ 2097 itx_t *itx; 2098 lr_truncate_t *lr; 2099 zilog_t *zilog = zv->zv_zilog; 2100 2101 if (zil_replaying(zilog, tx)) 2102 return; 2103 2104 itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 2105 lr = (lr_truncate_t *)&itx->itx_lr; 2106 lr->lr_foid = ZVOL_OBJ; 2107 lr->lr_offset = off; 2108 lr->lr_length = len; 2109 2110 itx->itx_sync = (sync || zv->zv_sync_cnt != 0); 2111 zil_itx_assign(zilog, itx, tx); 2112} 2113 2114#ifdef illumos 2115/* 2116 * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). 2117 * Also a dirtbag dkio ioctl for unmap/free-block functionality. 2118 */ 2119/*ARGSUSED*/ 2120int 2121zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 2122{ 2123 zvol_state_t *zv; 2124 struct dk_callback *dkc; 2125 int error = 0; 2126 rl_t *rl; 2127 2128 mutex_enter(&zfsdev_state_lock); 2129 2130 zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL); 2131 2132 if (zv == NULL) { 2133 mutex_exit(&zfsdev_state_lock); 2134 return (SET_ERROR(ENXIO)); 2135 } 2136 ASSERT(zv->zv_total_opens > 0); 2137 2138 switch (cmd) { 2139 2140 case DKIOCINFO: 2141 { 2142 struct dk_cinfo dki; 2143 2144 bzero(&dki, sizeof (dki)); 2145 (void) strcpy(dki.dki_cname, "zvol"); 2146 (void) strcpy(dki.dki_dname, "zvol"); 2147 dki.dki_ctype = DKC_UNKNOWN; 2148 dki.dki_unit = getminor(dev); 2149 dki.dki_maxtransfer = 2150 1 << (SPA_OLD_MAXBLOCKSHIFT - zv->zv_min_bs); 2151 mutex_exit(&zfsdev_state_lock); 2152 if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) 2153 error = SET_ERROR(EFAULT); 2154 return (error); 2155 } 2156 2157 case DKIOCGMEDIAINFO: 2158 { 2159 struct dk_minfo dkm; 2160 2161 bzero(&dkm, sizeof (dkm)); 2162 dkm.dki_lbsize = 1U << zv->zv_min_bs; 2163 dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 2164 dkm.dki_media_type = DK_UNKNOWN; 2165 mutex_exit(&zfsdev_state_lock); 2166 if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) 2167 error = SET_ERROR(EFAULT); 2168 return (error); 2169 } 2170 2171 case DKIOCGMEDIAINFOEXT: 2172 { 2173 struct dk_minfo_ext dkmext; 2174 2175 bzero(&dkmext, sizeof (dkmext)); 2176 dkmext.dki_lbsize = 1U << zv->zv_min_bs; 2177 dkmext.dki_pbsize = zv->zv_volblocksize; 2178 dkmext.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 2179 dkmext.dki_media_type = DK_UNKNOWN; 2180 mutex_exit(&zfsdev_state_lock); 2181 if (ddi_copyout(&dkmext, (void *)arg, sizeof (dkmext), flag)) 2182 error = SET_ERROR(EFAULT); 2183 return (error); 2184 } 2185 2186 case DKIOCGETEFI: 2187 { 2188 uint64_t vs = zv->zv_volsize; 2189 uint8_t bs = zv->zv_min_bs; 2190 2191 mutex_exit(&zfsdev_state_lock); 2192 error = zvol_getefi((void *)arg, flag, vs, bs); 2193 return (error); 2194 } 2195 2196 case DKIOCFLUSHWRITECACHE: 2197 dkc = (struct dk_callback *)arg; 2198 mutex_exit(&zfsdev_state_lock); 2199 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2200 if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { 2201 (*dkc->dkc_callback)(dkc->dkc_cookie, error); 2202 error = 0; 2203 } 2204 return (error); 2205 2206 case DKIOCGETWCE: 2207 { 2208 int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0; 2209 if (ddi_copyout(&wce, (void *)arg, sizeof (int), 2210 flag)) 2211 error = SET_ERROR(EFAULT); 2212 break; 2213 } 2214 case DKIOCSETWCE: 2215 { 2216 int wce; 2217 if (ddi_copyin((void *)arg, &wce, sizeof (int), 2218 flag)) { 2219 error = SET_ERROR(EFAULT); 2220 break; 2221 } 2222 if (wce) { 2223 zv->zv_flags |= ZVOL_WCE; 2224 mutex_exit(&zfsdev_state_lock); 2225 } else { 2226 zv->zv_flags &= ~ZVOL_WCE; 2227 mutex_exit(&zfsdev_state_lock); 2228 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2229 } 2230 return (0); 2231 } 2232 2233 case DKIOCGGEOM: 2234 case DKIOCGVTOC: 2235 /* 2236 * commands using these (like prtvtoc) expect ENOTSUP 2237 * since we're emulating an EFI label 2238 */ 2239 error = SET_ERROR(ENOTSUP); 2240 break; 2241 2242 case DKIOCDUMPINIT: 2243 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 2244 RL_WRITER); 2245 error = zvol_dumpify(zv); 2246 zfs_range_unlock(rl); 2247 break; 2248 2249 case DKIOCDUMPFINI: 2250 if (!(zv->zv_flags & ZVOL_DUMPIFIED)) 2251 break; 2252 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 2253 RL_WRITER); 2254 error = zvol_dump_fini(zv); 2255 zfs_range_unlock(rl); 2256 break; 2257 2258 case DKIOCFREE: 2259 { 2260 dkioc_free_t df; 2261 dmu_tx_t *tx; 2262 2263 if (!zvol_unmap_enabled) 2264 break; 2265 2266 if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) { 2267 error = SET_ERROR(EFAULT); 2268 break; 2269 } 2270 2271 /* 2272 * Apply Postel's Law to length-checking. If they overshoot, 2273 * just blank out until the end, if there's a need to blank 2274 * out anything. 2275 */ 2276 if (df.df_start >= zv->zv_volsize) 2277 break; /* No need to do anything... */ 2278 2279 mutex_exit(&zfsdev_state_lock); 2280 2281 rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, 2282 RL_WRITER); 2283 tx = dmu_tx_create(zv->zv_objset); 2284 dmu_tx_mark_netfree(tx); 2285 error = dmu_tx_assign(tx, TXG_WAIT); 2286 if (error != 0) { 2287 dmu_tx_abort(tx); 2288 } else { 2289 zvol_log_truncate(zv, tx, df.df_start, 2290 df.df_length, B_TRUE); 2291 dmu_tx_commit(tx); 2292 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 2293 df.df_start, df.df_length); 2294 } 2295 2296 zfs_range_unlock(rl); 2297 2298 /* 2299 * If the write-cache is disabled, 'sync' property 2300 * is set to 'always', or if the caller is asking for 2301 * a synchronous free, commit this operation to the zil. 2302 * This will sync any previous uncommitted writes to the 2303 * zvol object. 2304 * Can be overridden by the zvol_unmap_sync_enabled tunable. 2305 */ 2306 if ((error == 0) && zvol_unmap_sync_enabled && 2307 (!(zv->zv_flags & ZVOL_WCE) || 2308 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) || 2309 (df.df_flags & DF_WAIT_SYNC))) { 2310 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2311 } 2312 2313 return (error); 2314 } 2315 2316 default: 2317 error = SET_ERROR(ENOTTY); 2318 break; 2319 2320 } 2321 mutex_exit(&zfsdev_state_lock); 2322 return (error); 2323} 2324#endif /* illumos */ 2325 2326int 2327zvol_busy(void) 2328{ 2329 return (zvol_minors != 0); 2330} 2331 2332void 2333zvol_init(void) 2334{ 2335 VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t), 2336 1) == 0); 2337#ifdef illumos 2338 mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL); 2339#else 2340 ZFS_LOG(1, "ZVOL Initialized."); 2341#endif 2342} 2343 2344void 2345zvol_fini(void) 2346{ 2347#ifdef illumos 2348 mutex_destroy(&zfsdev_state_lock); 2349#endif 2350 ddi_soft_state_fini(&zfsdev_state); 2351 ZFS_LOG(1, "ZVOL Deinitialized."); 2352} 2353 2354#ifdef illumos 2355/*ARGSUSED*/ 2356static int 2357zfs_mvdev_dump_feature_check(void *arg, dmu_tx_t *tx) 2358{ 2359 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 2360 2361 if (spa_feature_is_active(spa, SPA_FEATURE_MULTI_VDEV_CRASH_DUMP)) 2362 return (1); 2363 return (0); 2364} 2365 2366/*ARGSUSED*/ 2367static void 2368zfs_mvdev_dump_activate_feature_sync(void *arg, dmu_tx_t *tx) 2369{ 2370 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 2371 2372 spa_feature_incr(spa, SPA_FEATURE_MULTI_VDEV_CRASH_DUMP, tx); 2373} 2374 2375static int 2376zvol_dump_init(zvol_state_t *zv, boolean_t resize) 2377{ 2378 dmu_tx_t *tx; 2379 int error; 2380 objset_t *os = zv->zv_objset; 2381 spa_t *spa = dmu_objset_spa(os); 2382 vdev_t *vd = spa->spa_root_vdev; 2383 nvlist_t *nv = NULL; 2384 uint64_t version = spa_version(spa); 2385 uint64_t checksum, compress, refresrv, vbs, dedup; 2386 2387 ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 2388 ASSERT(vd->vdev_ops == &vdev_root_ops); 2389 2390 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, 2391 DMU_OBJECT_END); 2392 if (error != 0) 2393 return (error); 2394 /* wait for dmu_free_long_range to actually free the blocks */ 2395 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 2396 2397 /* 2398 * If the pool on which the dump device is being initialized has more 2399 * than one child vdev, check that the MULTI_VDEV_CRASH_DUMP feature is 2400 * enabled. If so, bump that feature's counter to indicate that the 2401 * feature is active. We also check the vdev type to handle the 2402 * following case: 2403 * # zpool create test raidz disk1 disk2 disk3 2404 * Now have spa_root_vdev->vdev_children == 1 (the raidz vdev), 2405 * the raidz vdev itself has 3 children. 2406 */ 2407 if (vd->vdev_children > 1 || vd->vdev_ops == &vdev_raidz_ops) { 2408 if (!spa_feature_is_enabled(spa, 2409 SPA_FEATURE_MULTI_VDEV_CRASH_DUMP)) 2410 return (SET_ERROR(ENOTSUP)); 2411 (void) dsl_sync_task(spa_name(spa), 2412 zfs_mvdev_dump_feature_check, 2413 zfs_mvdev_dump_activate_feature_sync, NULL, 2414 2, ZFS_SPACE_CHECK_RESERVED); 2415 } 2416 2417 if (!resize) { 2418 error = dsl_prop_get_integer(zv->zv_name, 2419 zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); 2420 if (error == 0) { 2421 error = dsl_prop_get_integer(zv->zv_name, 2422 zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, 2423 NULL); 2424 } 2425 if (error == 0) { 2426 error = dsl_prop_get_integer(zv->zv_name, 2427 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2428 &refresrv, NULL); 2429 } 2430 if (error == 0) { 2431 error = dsl_prop_get_integer(zv->zv_name, 2432 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, 2433 NULL); 2434 } 2435 if (version >= SPA_VERSION_DEDUP && error == 0) { 2436 error = dsl_prop_get_integer(zv->zv_name, 2437 zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL); 2438 } 2439 } 2440 if (error != 0) 2441 return (error); 2442 2443 tx = dmu_tx_create(os); 2444 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2445 dmu_tx_hold_bonus(tx, ZVOL_OBJ); 2446 error = dmu_tx_assign(tx, TXG_WAIT); 2447 if (error != 0) { 2448 dmu_tx_abort(tx); 2449 return (error); 2450 } 2451 2452 /* 2453 * If we are resizing the dump device then we only need to 2454 * update the refreservation to match the newly updated 2455 * zvolsize. Otherwise, we save off the original state of the 2456 * zvol so that we can restore them if the zvol is ever undumpified. 2457 */ 2458 if (resize) { 2459 error = zap_update(os, ZVOL_ZAP_OBJ, 2460 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 2461 &zv->zv_volsize, tx); 2462 } else { 2463 error = zap_update(os, ZVOL_ZAP_OBJ, 2464 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, 2465 &compress, tx); 2466 if (error == 0) { 2467 error = zap_update(os, ZVOL_ZAP_OBJ, 2468 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, 2469 &checksum, tx); 2470 } 2471 if (error == 0) { 2472 error = zap_update(os, ZVOL_ZAP_OBJ, 2473 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 2474 &refresrv, tx); 2475 } 2476 if (error == 0) { 2477 error = zap_update(os, ZVOL_ZAP_OBJ, 2478 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, 2479 &vbs, tx); 2480 } 2481 if (error == 0) { 2482 error = dmu_object_set_blocksize( 2483 os, ZVOL_OBJ, SPA_OLD_MAXBLOCKSIZE, 0, tx); 2484 } 2485 if (version >= SPA_VERSION_DEDUP && error == 0) { 2486 error = zap_update(os, ZVOL_ZAP_OBJ, 2487 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, 2488 &dedup, tx); 2489 } 2490 if (error == 0) 2491 zv->zv_volblocksize = SPA_OLD_MAXBLOCKSIZE; 2492 } 2493 dmu_tx_commit(tx); 2494 2495 /* 2496 * We only need update the zvol's property if we are initializing 2497 * the dump area for the first time. 2498 */ 2499 if (error == 0 && !resize) { 2500 /* 2501 * If MULTI_VDEV_CRASH_DUMP is active, use the NOPARITY checksum 2502 * function. Otherwise, use the old default -- OFF. 2503 */ 2504 checksum = spa_feature_is_active(spa, 2505 SPA_FEATURE_MULTI_VDEV_CRASH_DUMP) ? ZIO_CHECKSUM_NOPARITY : 2506 ZIO_CHECKSUM_OFF; 2507 2508 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2509 VERIFY(nvlist_add_uint64(nv, 2510 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); 2511 VERIFY(nvlist_add_uint64(nv, 2512 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 2513 ZIO_COMPRESS_OFF) == 0); 2514 VERIFY(nvlist_add_uint64(nv, 2515 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 2516 checksum) == 0); 2517 if (version >= SPA_VERSION_DEDUP) { 2518 VERIFY(nvlist_add_uint64(nv, 2519 zfs_prop_to_name(ZFS_PROP_DEDUP), 2520 ZIO_CHECKSUM_OFF) == 0); 2521 } 2522 2523 error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 2524 nv, NULL); 2525 nvlist_free(nv); 2526 } 2527 2528 /* Allocate the space for the dump */ 2529 if (error == 0) 2530 error = zvol_prealloc(zv); 2531 return (error); 2532} 2533 2534static int 2535zvol_dumpify(zvol_state_t *zv) 2536{ 2537 int error = 0; 2538 uint64_t dumpsize = 0; 2539 dmu_tx_t *tx; 2540 objset_t *os = zv->zv_objset; 2541 2542 if (zv->zv_flags & ZVOL_RDONLY) 2543 return (SET_ERROR(EROFS)); 2544 2545 if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 2546 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { 2547 boolean_t resize = (dumpsize > 0); 2548 2549 if ((error = zvol_dump_init(zv, resize)) != 0) { 2550 (void) zvol_dump_fini(zv); 2551 return (error); 2552 } 2553 } 2554 2555 /* 2556 * Build up our lba mapping. 2557 */ 2558 error = zvol_get_lbas(zv); 2559 if (error) { 2560 (void) zvol_dump_fini(zv); 2561 return (error); 2562 } 2563 2564 tx = dmu_tx_create(os); 2565 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2566 error = dmu_tx_assign(tx, TXG_WAIT); 2567 if (error) { 2568 dmu_tx_abort(tx); 2569 (void) zvol_dump_fini(zv); 2570 return (error); 2571 } 2572 2573 zv->zv_flags |= ZVOL_DUMPIFIED; 2574 error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, 2575 &zv->zv_volsize, tx); 2576 dmu_tx_commit(tx); 2577 2578 if (error) { 2579 (void) zvol_dump_fini(zv); 2580 return (error); 2581 } 2582 2583 txg_wait_synced(dmu_objset_pool(os), 0); 2584 return (0); 2585} 2586 2587static int 2588zvol_dump_fini(zvol_state_t *zv) 2589{ 2590 dmu_tx_t *tx; 2591 objset_t *os = zv->zv_objset; 2592 nvlist_t *nv; 2593 int error = 0; 2594 uint64_t checksum, compress, refresrv, vbs, dedup; 2595 uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); 2596 2597 /* 2598 * Attempt to restore the zvol back to its pre-dumpified state. 2599 * This is a best-effort attempt as it's possible that not all 2600 * of these properties were initialized during the dumpify process 2601 * (i.e. error during zvol_dump_init). 2602 */ 2603 2604 tx = dmu_tx_create(os); 2605 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2606 error = dmu_tx_assign(tx, TXG_WAIT); 2607 if (error) { 2608 dmu_tx_abort(tx); 2609 return (error); 2610 } 2611 (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); 2612 dmu_tx_commit(tx); 2613 2614 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2615 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); 2616 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2617 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); 2618 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2619 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); 2620 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2621 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs); 2622 2623 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2624 (void) nvlist_add_uint64(nv, 2625 zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); 2626 (void) nvlist_add_uint64(nv, 2627 zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); 2628 (void) nvlist_add_uint64(nv, 2629 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); 2630 if (version >= SPA_VERSION_DEDUP && 2631 zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2632 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) { 2633 (void) nvlist_add_uint64(nv, 2634 zfs_prop_to_name(ZFS_PROP_DEDUP), dedup); 2635 } 2636 (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 2637 nv, NULL); 2638 nvlist_free(nv); 2639 2640 zvol_free_extents(zv); 2641 zv->zv_flags &= ~ZVOL_DUMPIFIED; 2642 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); 2643 /* wait for dmu_free_long_range to actually free the blocks */ 2644 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 2645 tx = dmu_tx_create(os); 2646 dmu_tx_hold_bonus(tx, ZVOL_OBJ); 2647 error = dmu_tx_assign(tx, TXG_WAIT); 2648 if (error) { 2649 dmu_tx_abort(tx); 2650 return (error); 2651 } 2652 if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0) 2653 zv->zv_volblocksize = vbs; 2654 dmu_tx_commit(tx); 2655 2656 return (0); 2657} 2658#else /* !illumos */ 2659 2660static void 2661zvol_geom_run(zvol_state_t *zv) 2662{ 2663 struct g_provider *pp; 2664 2665 pp = zv->zv_provider; 2666 g_error_provider(pp, 0); 2667 2668 kproc_kthread_add(zvol_geom_worker, zv, &zfsproc, NULL, 0, 0, 2669 "zfskern", "zvol %s", pp->name + sizeof(ZVOL_DRIVER)); 2670} 2671 2672static void 2673zvol_geom_destroy(zvol_state_t *zv) 2674{ 2675 struct g_provider *pp; 2676 2677 g_topology_assert(); 2678 2679 mtx_lock(&zv->zv_queue_mtx); 2680 zv->zv_state = 1; 2681 wakeup_one(&zv->zv_queue); 2682 while (zv->zv_state != 2) 2683 msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0); 2684 mtx_destroy(&zv->zv_queue_mtx); 2685 2686 pp = zv->zv_provider; 2687 zv->zv_provider = NULL; 2688 pp->private = NULL; 2689 g_wither_geom(pp->geom, ENXIO); 2690} 2691 2692static int 2693zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace) 2694{ 2695 int count, error, flags; 2696 2697 g_topology_assert(); 2698 2699 /* 2700 * To make it easier we expect either open or close, but not both 2701 * at the same time. 2702 */ 2703 KASSERT((acr >= 0 && acw >= 0 && ace >= 0) || 2704 (acr <= 0 && acw <= 0 && ace <= 0), 2705 ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).", 2706 pp->name, acr, acw, ace)); 2707 2708 if (pp->private == NULL) { 2709 if (acr <= 0 && acw <= 0 && ace <= 0) 2710 return (0); 2711 return (pp->error); 2712 } 2713 2714 /* 2715 * We don't pass FEXCL flag to zvol_open()/zvol_close() if ace != 0, 2716 * because GEOM already handles that and handles it a bit differently. 2717 * GEOM allows for multiple read/exclusive consumers and ZFS allows 2718 * only one exclusive consumer, no matter if it is reader or writer. 2719 * I like better the way GEOM works so I'll leave it for GEOM to 2720 * decide what to do. 2721 */ 2722 2723 count = acr + acw + ace; 2724 if (count == 0) 2725 return (0); 2726 2727 flags = 0; 2728 if (acr != 0 || ace != 0) 2729 flags |= FREAD; 2730 if (acw != 0) 2731 flags |= FWRITE; 2732 2733 g_topology_unlock(); 2734 if (count > 0) 2735 error = zvol_open(pp, flags, count); 2736 else 2737 error = zvol_close(pp, flags, -count); 2738 g_topology_lock(); 2739 return (error); 2740} 2741 2742static void 2743zvol_geom_start(struct bio *bp) 2744{ 2745 zvol_state_t *zv; 2746 boolean_t first; 2747 2748 zv = bp->bio_to->private; 2749 ASSERT(zv != NULL); 2750 switch (bp->bio_cmd) { 2751 case BIO_FLUSH: 2752 if (!THREAD_CAN_SLEEP()) 2753 goto enqueue; 2754 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2755 g_io_deliver(bp, 0); 2756 break; 2757 case BIO_READ: 2758 case BIO_WRITE: 2759 case BIO_DELETE: 2760 if (!THREAD_CAN_SLEEP()) 2761 goto enqueue; 2762 zvol_strategy(bp); 2763 break; 2764 case BIO_GETATTR: { 2765 spa_t *spa = dmu_objset_spa(zv->zv_objset); 2766 uint64_t refd, avail, usedobjs, availobjs, val; 2767 2768 if (g_handleattr_int(bp, "GEOM::candelete", 1)) 2769 return; 2770 if (strcmp(bp->bio_attribute, "blocksavail") == 0) { 2771 dmu_objset_space(zv->zv_objset, &refd, &avail, 2772 &usedobjs, &availobjs); 2773 if (g_handleattr_off_t(bp, "blocksavail", 2774 avail / DEV_BSIZE)) 2775 return; 2776 } else if (strcmp(bp->bio_attribute, "blocksused") == 0) { 2777 dmu_objset_space(zv->zv_objset, &refd, &avail, 2778 &usedobjs, &availobjs); 2779 if (g_handleattr_off_t(bp, "blocksused", 2780 refd / DEV_BSIZE)) 2781 return; 2782 } else if (strcmp(bp->bio_attribute, "poolblocksavail") == 0) { 2783 avail = metaslab_class_get_space(spa_normal_class(spa)); 2784 avail -= metaslab_class_get_alloc(spa_normal_class(spa)); 2785 if (g_handleattr_off_t(bp, "poolblocksavail", 2786 avail / DEV_BSIZE)) 2787 return; 2788 } else if (strcmp(bp->bio_attribute, "poolblocksused") == 0) { 2789 refd = metaslab_class_get_alloc(spa_normal_class(spa)); 2790 if (g_handleattr_off_t(bp, "poolblocksused", 2791 refd / DEV_BSIZE)) 2792 return; 2793 } 2794 /* FALLTHROUGH */ 2795 } 2796 default: 2797 g_io_deliver(bp, EOPNOTSUPP); 2798 break; 2799 } 2800 return; 2801 2802enqueue: 2803 mtx_lock(&zv->zv_queue_mtx); 2804 first = (bioq_first(&zv->zv_queue) == NULL); 2805 bioq_insert_tail(&zv->zv_queue, bp); 2806 mtx_unlock(&zv->zv_queue_mtx); 2807 if (first) 2808 wakeup_one(&zv->zv_queue); 2809} 2810 2811static void 2812zvol_geom_worker(void *arg) 2813{ 2814 zvol_state_t *zv; 2815 struct bio *bp; 2816 2817 thread_lock(curthread); 2818 sched_prio(curthread, PRIBIO); 2819 thread_unlock(curthread); 2820 2821 zv = arg; 2822 for (;;) { 2823 mtx_lock(&zv->zv_queue_mtx); 2824 bp = bioq_takefirst(&zv->zv_queue); 2825 if (bp == NULL) { 2826 if (zv->zv_state == 1) { 2827 zv->zv_state = 2; 2828 wakeup(&zv->zv_state); 2829 mtx_unlock(&zv->zv_queue_mtx); 2830 kthread_exit(); 2831 } 2832 msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP, 2833 "zvol:io", 0); 2834 continue; 2835 } 2836 mtx_unlock(&zv->zv_queue_mtx); 2837 switch (bp->bio_cmd) { 2838 case BIO_FLUSH: 2839 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2840 g_io_deliver(bp, 0); 2841 break; 2842 case BIO_READ: 2843 case BIO_WRITE: 2844 case BIO_DELETE: 2845 zvol_strategy(bp); 2846 break; 2847 default: 2848 g_io_deliver(bp, EOPNOTSUPP); 2849 break; 2850 } 2851 } 2852} 2853 2854extern boolean_t dataset_name_hidden(const char *name); 2855 2856static int 2857zvol_create_snapshots(objset_t *os, const char *name) 2858{ 2859 uint64_t cookie, obj; 2860 char *sname; 2861 int error, len; 2862 2863 cookie = obj = 0; 2864 sname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2865 2866#if 0 2867 (void) dmu_objset_find(name, dmu_objset_prefetch, NULL, 2868 DS_FIND_SNAPSHOTS); 2869#endif 2870 2871 for (;;) { 2872 len = snprintf(sname, MAXPATHLEN, "%s@", name); 2873 if (len >= MAXPATHLEN) { 2874 dmu_objset_rele(os, FTAG); 2875 error = ENAMETOOLONG; 2876 break; 2877 } 2878 2879 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 2880 error = dmu_snapshot_list_next(os, MAXPATHLEN - len, 2881 sname + len, &obj, &cookie, NULL); 2882 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 2883 if (error != 0) { 2884 if (error == ENOENT) 2885 error = 0; 2886 break; 2887 } 2888 2889 error = zvol_create_minor(sname); 2890 if (error != 0 && error != EEXIST) { 2891 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 2892 sname, error); 2893 break; 2894 } 2895 } 2896 2897 kmem_free(sname, MAXPATHLEN); 2898 return (error); 2899} 2900 2901int 2902zvol_create_minors(const char *name) 2903{ 2904 uint64_t cookie; 2905 objset_t *os; 2906 char *osname, *p; 2907 int error, len; 2908 2909 if (dataset_name_hidden(name)) 2910 return (0); 2911 2912 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 2913 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 2914 name, error); 2915 return (error); 2916 } 2917 if (dmu_objset_type(os) == DMU_OST_ZVOL) { 2918 dsl_dataset_long_hold(os->os_dsl_dataset, FTAG); 2919 dsl_pool_rele(dmu_objset_pool(os), FTAG); 2920 error = zvol_create_minor(name); 2921 if (error == 0 || error == EEXIST) { 2922 error = zvol_create_snapshots(os, name); 2923 } else { 2924 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 2925 name, error); 2926 } 2927 dsl_dataset_long_rele(os->os_dsl_dataset, FTAG); 2928 dsl_dataset_rele(os->os_dsl_dataset, FTAG); 2929 return (error); 2930 } 2931 if (dmu_objset_type(os) != DMU_OST_ZFS) { 2932 dmu_objset_rele(os, FTAG); 2933 return (0); 2934 } 2935 2936 osname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2937 if (snprintf(osname, MAXPATHLEN, "%s/", name) >= MAXPATHLEN) { 2938 dmu_objset_rele(os, FTAG); 2939 kmem_free(osname, MAXPATHLEN); 2940 return (ENOENT); 2941 } 2942 p = osname + strlen(osname); 2943 len = MAXPATHLEN - (p - osname); 2944 2945#if 0 2946 /* Prefetch the datasets. */ 2947 cookie = 0; 2948 while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { 2949 if (!dataset_name_hidden(osname)) 2950 (void) dmu_objset_prefetch(osname, NULL); 2951 } 2952#endif 2953 2954 cookie = 0; 2955 while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL, 2956 &cookie) == 0) { 2957 dmu_objset_rele(os, FTAG); 2958 (void)zvol_create_minors(osname); 2959 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 2960 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 2961 name, error); 2962 return (error); 2963 } 2964 } 2965 2966 dmu_objset_rele(os, FTAG); 2967 kmem_free(osname, MAXPATHLEN); 2968 return (0); 2969} 2970 2971static void 2972zvol_rename_minor(zvol_state_t *zv, const char *newname) 2973{ 2974 struct g_geom *gp; 2975 struct g_provider *pp; 2976 struct cdev *dev; 2977 2978 ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 2979 2980 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 2981 g_topology_lock(); 2982 pp = zv->zv_provider; 2983 ASSERT(pp != NULL); 2984 gp = pp->geom; 2985 ASSERT(gp != NULL); 2986 2987 zv->zv_provider = NULL; 2988 g_wither_provider(pp, ENXIO); 2989 2990 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname); 2991 pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; 2992 pp->sectorsize = DEV_BSIZE; 2993 pp->mediasize = zv->zv_volsize; 2994 pp->private = zv; 2995 zv->zv_provider = pp; 2996 g_error_provider(pp, 0); 2997 g_topology_unlock(); 2998 } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 2999 struct make_dev_args args; 3000 3001 if ((dev = zv->zv_dev) != NULL) { 3002 zv->zv_dev = NULL; 3003 destroy_dev(dev); 3004 if (zv->zv_total_opens > 0) { 3005 zv->zv_flags &= ~ZVOL_EXCL; 3006 zv->zv_total_opens = 0; 3007 zvol_last_close(zv); 3008 } 3009 } 3010 3011 make_dev_args_init(&args); 3012 args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 3013 args.mda_devsw = &zvol_cdevsw; 3014 args.mda_cr = NULL; 3015 args.mda_uid = UID_ROOT; 3016 args.mda_gid = GID_OPERATOR; 3017 args.mda_mode = 0640; 3018 args.mda_si_drv2 = zv; 3019 if (make_dev_s(&args, &zv->zv_dev, 3020 "%s/%s", ZVOL_DRIVER, newname) == 0) 3021 zv->zv_dev->si_iosize_max = MAXPHYS; 3022 } 3023 strlcpy(zv->zv_name, newname, sizeof(zv->zv_name)); 3024} 3025 3026void 3027zvol_rename_minors(const char *oldname, const char *newname) 3028{ 3029 char name[MAXPATHLEN]; 3030 struct g_provider *pp; 3031 struct g_geom *gp; 3032 size_t oldnamelen, newnamelen; 3033 zvol_state_t *zv; 3034 char *namebuf; 3035 boolean_t locked = B_FALSE; 3036 3037 oldnamelen = strlen(oldname); 3038 newnamelen = strlen(newname); 3039 3040 DROP_GIANT(); 3041 /* See comment in zvol_open(). */ 3042 if (!MUTEX_HELD(&zfsdev_state_lock)) { 3043 mutex_enter(&zfsdev_state_lock); 3044 locked = B_TRUE; 3045 } 3046 3047 LIST_FOREACH(zv, &all_zvols, zv_links) { 3048 if (strcmp(zv->zv_name, oldname) == 0) { 3049 zvol_rename_minor(zv, newname); 3050 } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 && 3051 (zv->zv_name[oldnamelen] == '/' || 3052 zv->zv_name[oldnamelen] == '@')) { 3053 snprintf(name, sizeof(name), "%s%c%s", newname, 3054 zv->zv_name[oldnamelen], 3055 zv->zv_name + oldnamelen + 1); 3056 zvol_rename_minor(zv, name); 3057 } 3058 } 3059 3060 if (locked) 3061 mutex_exit(&zfsdev_state_lock); 3062 PICKUP_GIANT(); 3063} 3064 3065static int 3066zvol_d_open(struct cdev *dev, int flags, int fmt, struct thread *td) 3067{ 3068 zvol_state_t *zv = dev->si_drv2; 3069 int err = 0; 3070 3071 mutex_enter(&zfsdev_state_lock); 3072 if (zv->zv_total_opens == 0) 3073 err = zvol_first_open(zv); 3074 if (err) { 3075 mutex_exit(&zfsdev_state_lock); 3076 return (err); 3077 } 3078 if ((flags & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 3079 err = SET_ERROR(EROFS); 3080 goto out; 3081 } 3082 if (zv->zv_flags & ZVOL_EXCL) { 3083 err = SET_ERROR(EBUSY); 3084 goto out; 3085 } 3086#ifdef FEXCL 3087 if (flags & FEXCL) { 3088 if (zv->zv_total_opens != 0) { 3089 err = SET_ERROR(EBUSY); 3090 goto out; 3091 } 3092 zv->zv_flags |= ZVOL_EXCL; 3093 } 3094#endif 3095 3096 zv->zv_total_opens++; 3097 if (flags & (FSYNC | FDSYNC)) { 3098 zv->zv_sync_cnt++; 3099 if (zv->zv_sync_cnt == 1) 3100 zil_async_to_sync(zv->zv_zilog, ZVOL_OBJ); 3101 } 3102 mutex_exit(&zfsdev_state_lock); 3103 return (err); 3104out: 3105 if (zv->zv_total_opens == 0) 3106 zvol_last_close(zv); 3107 mutex_exit(&zfsdev_state_lock); 3108 return (err); 3109} 3110 3111static int 3112zvol_d_close(struct cdev *dev, int flags, int fmt, struct thread *td) 3113{ 3114 zvol_state_t *zv = dev->si_drv2; 3115 3116 mutex_enter(&zfsdev_state_lock); 3117 if (zv->zv_flags & ZVOL_EXCL) { 3118 ASSERT(zv->zv_total_opens == 1); 3119 zv->zv_flags &= ~ZVOL_EXCL; 3120 } 3121 3122 /* 3123 * If the open count is zero, this is a spurious close. 3124 * That indicates a bug in the kernel / DDI framework. 3125 */ 3126 ASSERT(zv->zv_total_opens != 0); 3127 3128 /* 3129 * You may get multiple opens, but only one close. 3130 */ 3131 zv->zv_total_opens--; 3132 if (flags & (FSYNC | FDSYNC)) 3133 zv->zv_sync_cnt--; 3134 3135 if (zv->zv_total_opens == 0) 3136 zvol_last_close(zv); 3137 3138 mutex_exit(&zfsdev_state_lock); 3139 return (0); 3140} 3141 3142static int 3143zvol_d_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) 3144{ 3145 zvol_state_t *zv; 3146 rl_t *rl; 3147 off_t offset, length; 3148 int i, error; 3149 boolean_t sync; 3150 3151 zv = dev->si_drv2; 3152 3153 error = 0; 3154 KASSERT(zv->zv_total_opens > 0, 3155 ("Device with zero access count in zvol_d_ioctl")); 3156 3157 i = IOCPARM_LEN(cmd); 3158 switch (cmd) { 3159 case DIOCGSECTORSIZE: 3160 *(u_int *)data = DEV_BSIZE; 3161 break; 3162 case DIOCGMEDIASIZE: 3163 *(off_t *)data = zv->zv_volsize; 3164 break; 3165 case DIOCGFLUSH: 3166 zil_commit(zv->zv_zilog, ZVOL_OBJ); 3167 break; 3168 case DIOCGDELETE: 3169 if (!zvol_unmap_enabled) 3170 break; 3171 3172 offset = ((off_t *)data)[0]; 3173 length = ((off_t *)data)[1]; 3174 if ((offset % DEV_BSIZE) != 0 || (length % DEV_BSIZE) != 0 || 3175 offset < 0 || offset >= zv->zv_volsize || 3176 length <= 0) { 3177 printf("%s: offset=%jd length=%jd\n", __func__, offset, 3178 length); 3179 error = EINVAL; 3180 break; 3181 } 3182 3183 rl = zfs_range_lock(&zv->zv_znode, offset, length, RL_WRITER); 3184 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 3185 error = dmu_tx_assign(tx, TXG_WAIT); 3186 if (error != 0) { 3187 sync = FALSE; 3188 dmu_tx_abort(tx); 3189 } else { 3190 sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 3191 zvol_log_truncate(zv, tx, offset, length, sync); 3192 dmu_tx_commit(tx); 3193 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 3194 offset, length); 3195 } 3196 zfs_range_unlock(rl); 3197 if (sync) 3198 zil_commit(zv->zv_zilog, ZVOL_OBJ); 3199 break; 3200 case DIOCGSTRIPESIZE: 3201 *(off_t *)data = zv->zv_volblocksize; 3202 break; 3203 case DIOCGSTRIPEOFFSET: 3204 *(off_t *)data = 0; 3205 break; 3206 case DIOCGATTR: { 3207 spa_t *spa = dmu_objset_spa(zv->zv_objset); 3208 struct diocgattr_arg *arg = (struct diocgattr_arg *)data; 3209 uint64_t refd, avail, usedobjs, availobjs; 3210 3211 if (strcmp(arg->name, "GEOM::candelete") == 0) 3212 arg->value.i = 1; 3213 else if (strcmp(arg->name, "blocksavail") == 0) { 3214 dmu_objset_space(zv->zv_objset, &refd, &avail, 3215 &usedobjs, &availobjs); 3216 arg->value.off = avail / DEV_BSIZE; 3217 } else if (strcmp(arg->name, "blocksused") == 0) { 3218 dmu_objset_space(zv->zv_objset, &refd, &avail, 3219 &usedobjs, &availobjs); 3220 arg->value.off = refd / DEV_BSIZE; 3221 } else if (strcmp(arg->name, "poolblocksavail") == 0) { 3222 avail = metaslab_class_get_space(spa_normal_class(spa)); 3223 avail -= metaslab_class_get_alloc(spa_normal_class(spa)); 3224 arg->value.off = avail / DEV_BSIZE; 3225 } else if (strcmp(arg->name, "poolblocksused") == 0) { 3226 refd = metaslab_class_get_alloc(spa_normal_class(spa)); 3227 arg->value.off = refd / DEV_BSIZE; 3228 } else 3229 error = ENOIOCTL; 3230 break; 3231 } 3232 case FIOSEEKHOLE: 3233 case FIOSEEKDATA: { 3234 off_t *off = (off_t *)data; 3235 uint64_t noff; 3236 boolean_t hole; 3237 3238 hole = (cmd == FIOSEEKHOLE); 3239 noff = *off; 3240 error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); 3241 *off = noff; 3242 break; 3243 } 3244 default: 3245 error = ENOIOCTL; 3246 } 3247 3248 return (error); 3249} 3250#endif /* illumos */ 3251