zvol.c revision 272883
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * 24 * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org> 25 * All rights reserved. 26 * 27 * Portions Copyright 2010 Robert Milkowski 28 * 29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 30 * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 31 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 32 */ 33 34/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 35 36/* 37 * ZFS volume emulation driver. 38 * 39 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. 40 * Volumes are accessed through the symbolic links named: 41 * 42 * /dev/zvol/dsk/<pool_name>/<dataset_name> 43 * /dev/zvol/rdsk/<pool_name>/<dataset_name> 44 * 45 * These links are created by the /dev filesystem (sdev_zvolops.c). 46 * Volumes are persistent through reboot. No user command needs to be 47 * run before opening and using a device. 48 * 49 * FreeBSD notes. 50 * On FreeBSD ZVOLs are simply GEOM providers like any other storage device 51 * in the system. 52 */ 53 54#include <sys/types.h> 55#include <sys/param.h> 56#include <sys/kernel.h> 57#include <sys/errno.h> 58#include <sys/uio.h> 59#include <sys/bio.h> 60#include <sys/buf.h> 61#include <sys/kmem.h> 62#include <sys/conf.h> 63#include <sys/cmn_err.h> 64#include <sys/stat.h> 65#include <sys/zap.h> 66#include <sys/spa.h> 67#include <sys/spa_impl.h> 68#include <sys/zio.h> 69#include <sys/disk.h> 70#include <sys/dmu_traverse.h> 71#include <sys/dnode.h> 72#include <sys/dsl_dataset.h> 73#include <sys/dsl_prop.h> 74#include <sys/dkio.h> 75#include <sys/byteorder.h> 76#include <sys/sunddi.h> 77#include <sys/dirent.h> 78#include <sys/policy.h> 79#include <sys/queue.h> 80#include <sys/fs/zfs.h> 81#include <sys/zfs_ioctl.h> 82#include <sys/zil.h> 83#include <sys/refcount.h> 84#include <sys/zfs_znode.h> 85#include <sys/zfs_rlock.h> 86#include <sys/vdev_impl.h> 87#include <sys/vdev_raidz.h> 88#include <sys/zvol.h> 89#include <sys/zil_impl.h> 90#include <sys/dbuf.h> 91#include <sys/dmu_tx.h> 92#include <sys/zfeature.h> 93#include <sys/zio_checksum.h> 94 95#include <geom/geom.h> 96 97#include "zfs_namecheck.h" 98 99struct g_class zfs_zvol_class = { 100 .name = "ZFS::ZVOL", 101 .version = G_VERSION, 102}; 103 104DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol); 105 106void *zfsdev_state; 107static char *zvol_tag = "zvol_tag"; 108 109#define ZVOL_DUMPSIZE "dumpsize" 110 111/* 112 * The spa_namespace_lock protects the zfsdev_state structure from being 113 * modified while it's being used, e.g. an open that comes in before a 114 * create finishes. It also protects temporary opens of the dataset so that, 115 * e.g., an open doesn't get a spurious EBUSY. 116 */ 117static uint32_t zvol_minors; 118 119SYSCTL_DECL(_vfs_zfs); 120SYSCTL_NODE(_vfs_zfs, OID_AUTO, vol, CTLFLAG_RW, 0, "ZFS VOLUME"); 121static int volmode = ZFS_VOLMODE_GEOM; 122TUNABLE_INT("vfs.zfs.vol.mode", &volmode); 123SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, mode, CTLFLAG_RWTUN, &volmode, 0, 124 "Expose as GEOM providers (1), device files (2) or neither"); 125 126typedef struct zvol_extent { 127 list_node_t ze_node; 128 dva_t ze_dva; /* dva associated with this extent */ 129 uint64_t ze_nblks; /* number of blocks in extent */ 130} zvol_extent_t; 131 132/* 133 * The in-core state of each volume. 134 */ 135typedef struct zvol_state { 136 LIST_ENTRY(zvol_state) zv_links; 137 char zv_name[MAXPATHLEN]; /* pool/dd name */ 138 uint64_t zv_volsize; /* amount of space we advertise */ 139 uint64_t zv_volblocksize; /* volume block size */ 140 struct cdev *zv_dev; /* non-GEOM device */ 141 struct g_provider *zv_provider; /* GEOM provider */ 142 uint8_t zv_min_bs; /* minimum addressable block shift */ 143 uint8_t zv_flags; /* readonly, dumpified, etc. */ 144 objset_t *zv_objset; /* objset handle */ 145 uint32_t zv_total_opens; /* total open count */ 146 zilog_t *zv_zilog; /* ZIL handle */ 147 list_t zv_extents; /* List of extents for dump */ 148 znode_t zv_znode; /* for range locking */ 149 dmu_buf_t *zv_dbuf; /* bonus handle */ 150 int zv_state; 151 int zv_volmode; /* Provide GEOM or cdev */ 152 struct bio_queue_head zv_queue; 153 struct mtx zv_queue_mtx; /* zv_queue mutex */ 154} zvol_state_t; 155 156static LIST_HEAD(, zvol_state) all_zvols; 157 158/* 159 * zvol specific flags 160 */ 161#define ZVOL_RDONLY 0x1 162#define ZVOL_DUMPIFIED 0x2 163#define ZVOL_EXCL 0x4 164#define ZVOL_WCE 0x8 165 166/* 167 * zvol maximum transfer in one DMU tx. 168 */ 169int zvol_maxphys = DMU_MAX_ACCESS/2; 170 171static d_open_t zvol_d_open; 172static d_close_t zvol_d_close; 173static d_read_t zvol_read; 174static d_write_t zvol_write; 175static d_ioctl_t zvol_d_ioctl; 176static d_strategy_t zvol_strategy; 177 178static struct cdevsw zvol_cdevsw = { 179 .d_version = D_VERSION, 180 .d_open = zvol_d_open, 181 .d_close = zvol_d_close, 182 .d_read = zvol_read, 183 .d_write = zvol_write, 184 .d_ioctl = zvol_d_ioctl, 185 .d_strategy = zvol_strategy, 186 .d_name = "zvol", 187 .d_flags = D_DISK | D_TRACKCLOSE, 188}; 189 190extern int zfs_set_prop_nvlist(const char *, zprop_source_t, 191 nvlist_t *, nvlist_t *); 192static void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, 193 uint64_t len, boolean_t sync); 194static int zvol_remove_zv(zvol_state_t *); 195static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); 196static int zvol_dumpify(zvol_state_t *zv); 197static int zvol_dump_fini(zvol_state_t *zv); 198static int zvol_dump_init(zvol_state_t *zv, boolean_t resize); 199 200static void zvol_geom_run(zvol_state_t *zv); 201static void zvol_geom_destroy(zvol_state_t *zv); 202static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace); 203static void zvol_geom_start(struct bio *bp); 204static void zvol_geom_worker(void *arg); 205 206static void 207zvol_size_changed(zvol_state_t *zv) 208{ 209#ifdef sun 210 dev_t dev = makedevice(maj, min); 211 212 VERIFY(ddi_prop_update_int64(dev, zfs_dip, 213 "Size", volsize) == DDI_SUCCESS); 214 VERIFY(ddi_prop_update_int64(dev, zfs_dip, 215 "Nblocks", lbtodb(volsize)) == DDI_SUCCESS); 216 217 /* Notify specfs to invalidate the cached size */ 218 spec_size_invalidate(dev, VBLK); 219 spec_size_invalidate(dev, VCHR); 220#else /* !sun */ 221 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 222 struct g_provider *pp; 223 224 pp = zv->zv_provider; 225 if (pp == NULL) 226 return; 227 g_topology_lock(); 228 g_resize_provider(pp, zv->zv_volsize); 229 g_topology_unlock(); 230 } 231#endif /* !sun */ 232} 233 234int 235zvol_check_volsize(uint64_t volsize, uint64_t blocksize) 236{ 237 if (volsize == 0) 238 return (SET_ERROR(EINVAL)); 239 240 if (volsize % blocksize != 0) 241 return (SET_ERROR(EINVAL)); 242 243#ifdef _ILP32 244 if (volsize - 1 > SPEC_MAXOFFSET_T) 245 return (SET_ERROR(EOVERFLOW)); 246#endif 247 return (0); 248} 249 250int 251zvol_check_volblocksize(uint64_t volblocksize) 252{ 253 if (volblocksize < SPA_MINBLOCKSIZE || 254 volblocksize > SPA_MAXBLOCKSIZE || 255 !ISP2(volblocksize)) 256 return (SET_ERROR(EDOM)); 257 258 return (0); 259} 260 261int 262zvol_get_stats(objset_t *os, nvlist_t *nv) 263{ 264 int error; 265 dmu_object_info_t doi; 266 uint64_t val; 267 268 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); 269 if (error) 270 return (error); 271 272 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); 273 274 error = dmu_object_info(os, ZVOL_OBJ, &doi); 275 276 if (error == 0) { 277 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, 278 doi.doi_data_block_size); 279 } 280 281 return (error); 282} 283 284static zvol_state_t * 285zvol_minor_lookup(const char *name) 286{ 287 zvol_state_t *zv; 288 289 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 290 291 LIST_FOREACH(zv, &all_zvols, zv_links) { 292 if (strcmp(zv->zv_name, name) == 0) 293 break; 294 } 295 296 return (zv); 297} 298 299/* extent mapping arg */ 300struct maparg { 301 zvol_state_t *ma_zv; 302 uint64_t ma_blks; 303}; 304 305/*ARGSUSED*/ 306static int 307zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 308 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 309{ 310 struct maparg *ma = arg; 311 zvol_extent_t *ze; 312 int bs = ma->ma_zv->zv_volblocksize; 313 314 if (BP_IS_HOLE(bp) || 315 zb->zb_object != ZVOL_OBJ || zb->zb_level != 0) 316 return (0); 317 318 VERIFY(!BP_IS_EMBEDDED(bp)); 319 320 VERIFY3U(ma->ma_blks, ==, zb->zb_blkid); 321 ma->ma_blks++; 322 323 /* Abort immediately if we have encountered gang blocks */ 324 if (BP_IS_GANG(bp)) 325 return (SET_ERROR(EFRAGS)); 326 327 /* 328 * See if the block is at the end of the previous extent. 329 */ 330 ze = list_tail(&ma->ma_zv->zv_extents); 331 if (ze && 332 DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) && 333 DVA_GET_OFFSET(BP_IDENTITY(bp)) == 334 DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) { 335 ze->ze_nblks++; 336 return (0); 337 } 338 339 dprintf_bp(bp, "%s", "next blkptr:"); 340 341 /* start a new extent */ 342 ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP); 343 ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ 344 ze->ze_nblks = 1; 345 list_insert_tail(&ma->ma_zv->zv_extents, ze); 346 return (0); 347} 348 349static void 350zvol_free_extents(zvol_state_t *zv) 351{ 352 zvol_extent_t *ze; 353 354 while (ze = list_head(&zv->zv_extents)) { 355 list_remove(&zv->zv_extents, ze); 356 kmem_free(ze, sizeof (zvol_extent_t)); 357 } 358} 359 360static int 361zvol_get_lbas(zvol_state_t *zv) 362{ 363 objset_t *os = zv->zv_objset; 364 struct maparg ma; 365 int err; 366 367 ma.ma_zv = zv; 368 ma.ma_blks = 0; 369 zvol_free_extents(zv); 370 371 /* commit any in-flight changes before traversing the dataset */ 372 txg_wait_synced(dmu_objset_pool(os), 0); 373 err = traverse_dataset(dmu_objset_ds(os), 0, 374 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma); 375 if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) { 376 zvol_free_extents(zv); 377 return (err ? err : EIO); 378 } 379 380 return (0); 381} 382 383/* ARGSUSED */ 384void 385zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 386{ 387 zfs_creat_t *zct = arg; 388 nvlist_t *nvprops = zct->zct_props; 389 int error; 390 uint64_t volblocksize, volsize; 391 392 VERIFY(nvlist_lookup_uint64(nvprops, 393 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); 394 if (nvlist_lookup_uint64(nvprops, 395 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) 396 volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); 397 398 /* 399 * These properties must be removed from the list so the generic 400 * property setting step won't apply to them. 401 */ 402 VERIFY(nvlist_remove_all(nvprops, 403 zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); 404 (void) nvlist_remove_all(nvprops, 405 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); 406 407 error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, 408 DMU_OT_NONE, 0, tx); 409 ASSERT(error == 0); 410 411 error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, 412 DMU_OT_NONE, 0, tx); 413 ASSERT(error == 0); 414 415 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); 416 ASSERT(error == 0); 417} 418 419/* 420 * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we 421 * implement DKIOCFREE/free-long-range. 422 */ 423static int 424zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap) 425{ 426 uint64_t offset, length; 427 428 if (byteswap) 429 byteswap_uint64_array(lr, sizeof (*lr)); 430 431 offset = lr->lr_offset; 432 length = lr->lr_length; 433 434 return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length)); 435} 436 437/* 438 * Replay a TX_WRITE ZIL transaction that didn't get committed 439 * after a system failure 440 */ 441static int 442zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) 443{ 444 objset_t *os = zv->zv_objset; 445 char *data = (char *)(lr + 1); /* data follows lr_write_t */ 446 uint64_t offset, length; 447 dmu_tx_t *tx; 448 int error; 449 450 if (byteswap) 451 byteswap_uint64_array(lr, sizeof (*lr)); 452 453 offset = lr->lr_offset; 454 length = lr->lr_length; 455 456 /* If it's a dmu_sync() block, write the whole block */ 457 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 458 uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 459 if (length < blocksize) { 460 offset -= offset % blocksize; 461 length = blocksize; 462 } 463 } 464 465 tx = dmu_tx_create(os); 466 dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length); 467 error = dmu_tx_assign(tx, TXG_WAIT); 468 if (error) { 469 dmu_tx_abort(tx); 470 } else { 471 dmu_write(os, ZVOL_OBJ, offset, length, data, tx); 472 dmu_tx_commit(tx); 473 } 474 475 return (error); 476} 477 478/* ARGSUSED */ 479static int 480zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) 481{ 482 return (SET_ERROR(ENOTSUP)); 483} 484 485/* 486 * Callback vectors for replaying records. 487 * Only TX_WRITE and TX_TRUNCATE are needed for zvol. 488 */ 489zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { 490 zvol_replay_err, /* 0 no such transaction type */ 491 zvol_replay_err, /* TX_CREATE */ 492 zvol_replay_err, /* TX_MKDIR */ 493 zvol_replay_err, /* TX_MKXATTR */ 494 zvol_replay_err, /* TX_SYMLINK */ 495 zvol_replay_err, /* TX_REMOVE */ 496 zvol_replay_err, /* TX_RMDIR */ 497 zvol_replay_err, /* TX_LINK */ 498 zvol_replay_err, /* TX_RENAME */ 499 zvol_replay_write, /* TX_WRITE */ 500 zvol_replay_truncate, /* TX_TRUNCATE */ 501 zvol_replay_err, /* TX_SETATTR */ 502 zvol_replay_err, /* TX_ACL */ 503 zvol_replay_err, /* TX_CREATE_ACL */ 504 zvol_replay_err, /* TX_CREATE_ATTR */ 505 zvol_replay_err, /* TX_CREATE_ACL_ATTR */ 506 zvol_replay_err, /* TX_MKDIR_ACL */ 507 zvol_replay_err, /* TX_MKDIR_ATTR */ 508 zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ 509 zvol_replay_err, /* TX_WRITE2 */ 510}; 511 512#ifdef sun 513int 514zvol_name2minor(const char *name, minor_t *minor) 515{ 516 zvol_state_t *zv; 517 518 mutex_enter(&spa_namespace_lock); 519 zv = zvol_minor_lookup(name); 520 if (minor && zv) 521 *minor = zv->zv_minor; 522 mutex_exit(&spa_namespace_lock); 523 return (zv ? 0 : -1); 524} 525#endif /* sun */ 526 527/* 528 * Create a minor node (plus a whole lot more) for the specified volume. 529 */ 530int 531zvol_create_minor(const char *name) 532{ 533 zfs_soft_state_t *zs; 534 zvol_state_t *zv; 535 objset_t *os; 536 struct cdev *dev; 537 struct g_provider *pp; 538 struct g_geom *gp; 539 dmu_object_info_t doi; 540 uint64_t volsize, mode; 541 int error; 542 543 ZFS_LOG(1, "Creating ZVOL %s...", name); 544 545 mutex_enter(&spa_namespace_lock); 546 547 if (zvol_minor_lookup(name) != NULL) { 548 mutex_exit(&spa_namespace_lock); 549 return (SET_ERROR(EEXIST)); 550 } 551 552 /* lie and say we're read-only */ 553 error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); 554 555 if (error) { 556 mutex_exit(&spa_namespace_lock); 557 return (error); 558 } 559 560#ifdef sun 561 if ((minor = zfsdev_minor_alloc()) == 0) { 562 dmu_objset_disown(os, FTAG); 563 mutex_exit(&spa_namespace_lock); 564 return (SET_ERROR(ENXIO)); 565 } 566 567 if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { 568 dmu_objset_disown(os, FTAG); 569 mutex_exit(&spa_namespace_lock); 570 return (SET_ERROR(EAGAIN)); 571 } 572 (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, 573 (char *)name); 574 575 (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor); 576 577 if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, 578 minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 579 ddi_soft_state_free(zfsdev_state, minor); 580 dmu_objset_disown(os, FTAG); 581 mutex_exit(&spa_namespace_lock); 582 return (SET_ERROR(EAGAIN)); 583 } 584 585 (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor); 586 587 if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, 588 minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 589 ddi_remove_minor_node(zfs_dip, chrbuf); 590 ddi_soft_state_free(zfsdev_state, minor); 591 dmu_objset_disown(os, FTAG); 592 mutex_exit(&spa_namespace_lock); 593 return (SET_ERROR(EAGAIN)); 594 } 595 596 zs = ddi_get_soft_state(zfsdev_state, minor); 597 zs->zss_type = ZSST_ZVOL; 598 zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); 599#else /* !sun */ 600 601 zv = kmem_zalloc(sizeof(*zv), KM_SLEEP); 602 zv->zv_state = 0; 603 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 604 if (error) { 605 kmem_free(zv, sizeof(*zv)); 606 dmu_objset_disown(os, zvol_tag); 607 mutex_exit(&spa_namespace_lock); 608 return (error); 609 } 610 error = dsl_prop_get_integer(name, 611 zfs_prop_to_name(ZFS_PROP_VOLMODE), &mode, NULL); 612 if (error != 0 || mode == ZFS_VOLMODE_DEFAULT) 613 mode = volmode; 614 615 DROP_GIANT(); 616 zv->zv_volsize = volsize; 617 zv->zv_volmode = mode; 618 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 619 g_topology_lock(); 620 gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name); 621 gp->start = zvol_geom_start; 622 gp->access = zvol_geom_access; 623 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name); 624 pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; 625 pp->sectorsize = DEV_BSIZE; 626 pp->mediasize = zv->zv_volsize; 627 pp->private = zv; 628 629 zv->zv_provider = pp; 630 bioq_init(&zv->zv_queue); 631 mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF); 632 } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 633 if (make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, 634 &dev, &zvol_cdevsw, NULL, UID_ROOT, GID_OPERATOR, 635 0640, "%s/%s", ZVOL_DRIVER, name) != 0) { 636 kmem_free(zv, sizeof(*zv)); 637 dmu_objset_disown(os, FTAG); 638 mutex_exit(&spa_namespace_lock); 639 return (SET_ERROR(ENXIO)); 640 } 641 zv->zv_dev = dev; 642 dev->si_iosize_max = MAXPHYS; 643 dev->si_drv2 = zv; 644 } 645 LIST_INSERT_HEAD(&all_zvols, zv, zv_links); 646#endif /* !sun */ 647 648 (void) strlcpy(zv->zv_name, name, MAXPATHLEN); 649 zv->zv_min_bs = DEV_BSHIFT; 650 zv->zv_objset = os; 651 if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) 652 zv->zv_flags |= ZVOL_RDONLY; 653 mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); 654 avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, 655 sizeof (rl_t), offsetof(rl_t, r_node)); 656 list_create(&zv->zv_extents, sizeof (zvol_extent_t), 657 offsetof(zvol_extent_t, ze_node)); 658 /* get and cache the blocksize */ 659 error = dmu_object_info(os, ZVOL_OBJ, &doi); 660 ASSERT(error == 0); 661 zv->zv_volblocksize = doi.doi_data_block_size; 662 663 if (spa_writeable(dmu_objset_spa(os))) { 664 if (zil_replay_disable) 665 zil_destroy(dmu_objset_zil(os), B_FALSE); 666 else 667 zil_replay(os, zv, zvol_replay_vector); 668 } 669 dmu_objset_disown(os, FTAG); 670 zv->zv_objset = NULL; 671 672 zvol_minors++; 673 674 mutex_exit(&spa_namespace_lock); 675 676#ifndef sun 677 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 678 zvol_geom_run(zv); 679 g_topology_unlock(); 680 } 681 PICKUP_GIANT(); 682#endif 683 684 ZFS_LOG(1, "ZVOL %s created.", name); 685 686 return (0); 687} 688 689/* 690 * Remove minor node for the specified volume. 691 */ 692static int 693zvol_remove_zv(zvol_state_t *zv) 694{ 695#ifdef sun 696 minor_t minor = zv->zv_minor; 697#endif 698 699 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 700 if (zv->zv_total_opens != 0) 701 return (SET_ERROR(EBUSY)); 702 703 ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); 704 705#ifdef sun 706 (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor); 707 ddi_remove_minor_node(zfs_dip, nmbuf); 708#else 709 LIST_REMOVE(zv, zv_links); 710 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 711 g_topology_lock(); 712 zvol_geom_destroy(zv); 713 g_topology_unlock(); 714 } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) 715 destroy_dev(zv->zv_dev); 716#endif /* sun */ 717 718 avl_destroy(&zv->zv_znode.z_range_avl); 719 mutex_destroy(&zv->zv_znode.z_range_lock); 720 721 kmem_free(zv, sizeof(*zv)); 722 723 zvol_minors--; 724 return (0); 725} 726 727int 728zvol_remove_minor(const char *name) 729{ 730 zvol_state_t *zv; 731 int rc; 732 733 mutex_enter(&spa_namespace_lock); 734 if ((zv = zvol_minor_lookup(name)) == NULL) { 735 mutex_exit(&spa_namespace_lock); 736 return (SET_ERROR(ENXIO)); 737 } 738 rc = zvol_remove_zv(zv); 739 mutex_exit(&spa_namespace_lock); 740 return (rc); 741} 742 743int 744zvol_first_open(zvol_state_t *zv) 745{ 746 objset_t *os; 747 uint64_t volsize; 748 int error; 749 uint64_t readonly; 750 751 /* lie and say we're read-only */ 752 error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, 753 zvol_tag, &os); 754 if (error) 755 return (error); 756 757 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 758 if (error) { 759 ASSERT(error == 0); 760 dmu_objset_disown(os, zvol_tag); 761 return (error); 762 } 763 zv->zv_objset = os; 764 error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); 765 if (error) { 766 dmu_objset_disown(os, zvol_tag); 767 return (error); 768 } 769 zv->zv_volsize = volsize; 770 zv->zv_zilog = zil_open(os, zvol_get_data); 771 zvol_size_changed(zv); 772 773 VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly, 774 NULL) == 0); 775 if (readonly || dmu_objset_is_snapshot(os) || 776 !spa_writeable(dmu_objset_spa(os))) 777 zv->zv_flags |= ZVOL_RDONLY; 778 else 779 zv->zv_flags &= ~ZVOL_RDONLY; 780 return (error); 781} 782 783void 784zvol_last_close(zvol_state_t *zv) 785{ 786 zil_close(zv->zv_zilog); 787 zv->zv_zilog = NULL; 788 789 dmu_buf_rele(zv->zv_dbuf, zvol_tag); 790 zv->zv_dbuf = NULL; 791 792 /* 793 * Evict cached data 794 */ 795 if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && 796 !(zv->zv_flags & ZVOL_RDONLY)) 797 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 798 dmu_objset_evict_dbufs(zv->zv_objset); 799 800 dmu_objset_disown(zv->zv_objset, zvol_tag); 801 zv->zv_objset = NULL; 802} 803 804#ifdef sun 805int 806zvol_prealloc(zvol_state_t *zv) 807{ 808 objset_t *os = zv->zv_objset; 809 dmu_tx_t *tx; 810 uint64_t refd, avail, usedobjs, availobjs; 811 uint64_t resid = zv->zv_volsize; 812 uint64_t off = 0; 813 814 /* Check the space usage before attempting to allocate the space */ 815 dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); 816 if (avail < zv->zv_volsize) 817 return (SET_ERROR(ENOSPC)); 818 819 /* Free old extents if they exist */ 820 zvol_free_extents(zv); 821 822 while (resid != 0) { 823 int error; 824 uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE); 825 826 tx = dmu_tx_create(os); 827 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 828 error = dmu_tx_assign(tx, TXG_WAIT); 829 if (error) { 830 dmu_tx_abort(tx); 831 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); 832 return (error); 833 } 834 dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx); 835 dmu_tx_commit(tx); 836 off += bytes; 837 resid -= bytes; 838 } 839 txg_wait_synced(dmu_objset_pool(os), 0); 840 841 return (0); 842} 843#endif /* sun */ 844 845static int 846zvol_update_volsize(objset_t *os, uint64_t volsize) 847{ 848 dmu_tx_t *tx; 849 int error; 850 851 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 852 853 tx = dmu_tx_create(os); 854 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 855 dmu_tx_mark_netfree(tx); 856 error = dmu_tx_assign(tx, TXG_WAIT); 857 if (error) { 858 dmu_tx_abort(tx); 859 return (error); 860 } 861 862 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, 863 &volsize, tx); 864 dmu_tx_commit(tx); 865 866 if (error == 0) 867 error = dmu_free_long_range(os, 868 ZVOL_OBJ, volsize, DMU_OBJECT_END); 869 return (error); 870} 871 872void 873zvol_remove_minors(const char *name) 874{ 875 zvol_state_t *zv, *tzv; 876 size_t namelen; 877 878 namelen = strlen(name); 879 880 DROP_GIANT(); 881 mutex_enter(&spa_namespace_lock); 882 883 LIST_FOREACH_SAFE(zv, &all_zvols, zv_links, tzv) { 884 if (strcmp(zv->zv_name, name) == 0 || 885 (strncmp(zv->zv_name, name, namelen) == 0 && 886 strlen(zv->zv_name) > namelen && (zv->zv_name[namelen] == '/' || 887 zv->zv_name[namelen] == '@'))) { 888 (void) zvol_remove_zv(zv); 889 } 890 } 891 892 mutex_exit(&spa_namespace_lock); 893 PICKUP_GIANT(); 894} 895 896int 897zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) 898{ 899 zvol_state_t *zv = NULL; 900 objset_t *os; 901 int error; 902 dmu_object_info_t doi; 903 uint64_t old_volsize = 0ULL; 904 uint64_t readonly; 905 906 mutex_enter(&spa_namespace_lock); 907 zv = zvol_minor_lookup(name); 908 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 909 mutex_exit(&spa_namespace_lock); 910 return (error); 911 } 912 913 if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || 914 (error = zvol_check_volsize(volsize, 915 doi.doi_data_block_size)) != 0) 916 goto out; 917 918 VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, 919 NULL) == 0); 920 if (readonly) { 921 error = EROFS; 922 goto out; 923 } 924 925 error = zvol_update_volsize(os, volsize); 926 /* 927 * Reinitialize the dump area to the new size. If we 928 * failed to resize the dump area then restore it back to 929 * its original size. 930 */ 931 if (zv && error == 0) { 932#ifdef ZVOL_DUMP 933 if (zv->zv_flags & ZVOL_DUMPIFIED) { 934 old_volsize = zv->zv_volsize; 935 zv->zv_volsize = volsize; 936 if ((error = zvol_dumpify(zv)) != 0 || 937 (error = dumpvp_resize()) != 0) { 938 (void) zvol_update_volsize(os, old_volsize); 939 zv->zv_volsize = old_volsize; 940 error = zvol_dumpify(zv); 941 } 942 } 943#endif /* ZVOL_DUMP */ 944 if (error == 0) { 945 zv->zv_volsize = volsize; 946 zvol_size_changed(zv); 947 } 948 } 949 950#ifdef sun 951 /* 952 * Generate a LUN expansion event. 953 */ 954 if (zv && error == 0) { 955 sysevent_id_t eid; 956 nvlist_t *attr; 957 char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 958 959 (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV, 960 zv->zv_minor); 961 962 VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 963 VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 964 965 (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 966 ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 967 968 nvlist_free(attr); 969 kmem_free(physpath, MAXPATHLEN); 970 } 971#endif /* sun */ 972 973out: 974 dmu_objset_rele(os, FTAG); 975 976 mutex_exit(&spa_namespace_lock); 977 978 return (error); 979} 980 981/*ARGSUSED*/ 982static int 983zvol_open(struct g_provider *pp, int flag, int count) 984{ 985 zvol_state_t *zv; 986 int err = 0; 987 boolean_t locked = B_FALSE; 988 989 /* 990 * Protect against recursively entering spa_namespace_lock 991 * when spa_open() is used for a pool on a (local) ZVOL(s). 992 * This is needed since we replaced upstream zfsdev_state_lock 993 * with spa_namespace_lock in the ZVOL code. 994 * We are using the same trick as spa_open(). 995 * Note that calls in zvol_first_open which need to resolve 996 * pool name to a spa object will enter spa_open() 997 * recursively, but that function already has all the 998 * necessary protection. 999 */ 1000 if (!MUTEX_HELD(&spa_namespace_lock)) { 1001 mutex_enter(&spa_namespace_lock); 1002 locked = B_TRUE; 1003 } 1004 1005 zv = pp->private; 1006 if (zv == NULL) { 1007 if (locked) 1008 mutex_exit(&spa_namespace_lock); 1009 return (SET_ERROR(ENXIO)); 1010 } 1011 1012 if (zv->zv_total_opens == 0) { 1013 err = zvol_first_open(zv); 1014 if (err) { 1015 if (locked) 1016 mutex_exit(&spa_namespace_lock); 1017 return (err); 1018 } 1019 pp->mediasize = zv->zv_volsize; 1020 pp->stripeoffset = 0; 1021 pp->stripesize = zv->zv_volblocksize; 1022 } 1023 if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 1024 err = SET_ERROR(EROFS); 1025 goto out; 1026 } 1027 if (zv->zv_flags & ZVOL_EXCL) { 1028 err = SET_ERROR(EBUSY); 1029 goto out; 1030 } 1031#ifdef FEXCL 1032 if (flag & FEXCL) { 1033 if (zv->zv_total_opens != 0) { 1034 err = SET_ERROR(EBUSY); 1035 goto out; 1036 } 1037 zv->zv_flags |= ZVOL_EXCL; 1038 } 1039#endif 1040 1041 zv->zv_total_opens += count; 1042 if (locked) 1043 mutex_exit(&spa_namespace_lock); 1044 1045 return (err); 1046out: 1047 if (zv->zv_total_opens == 0) 1048 zvol_last_close(zv); 1049 if (locked) 1050 mutex_exit(&spa_namespace_lock); 1051 return (err); 1052} 1053 1054/*ARGSUSED*/ 1055static int 1056zvol_close(struct g_provider *pp, int flag, int count) 1057{ 1058 zvol_state_t *zv; 1059 int error = 0; 1060 boolean_t locked = B_FALSE; 1061 1062 /* See comment in zvol_open(). */ 1063 if (!MUTEX_HELD(&spa_namespace_lock)) { 1064 mutex_enter(&spa_namespace_lock); 1065 locked = B_TRUE; 1066 } 1067 1068 zv = pp->private; 1069 if (zv == NULL) { 1070 if (locked) 1071 mutex_exit(&spa_namespace_lock); 1072 return (SET_ERROR(ENXIO)); 1073 } 1074 1075 if (zv->zv_flags & ZVOL_EXCL) { 1076 ASSERT(zv->zv_total_opens == 1); 1077 zv->zv_flags &= ~ZVOL_EXCL; 1078 } 1079 1080 /* 1081 * If the open count is zero, this is a spurious close. 1082 * That indicates a bug in the kernel / DDI framework. 1083 */ 1084 ASSERT(zv->zv_total_opens != 0); 1085 1086 /* 1087 * You may get multiple opens, but only one close. 1088 */ 1089 zv->zv_total_opens -= count; 1090 1091 if (zv->zv_total_opens == 0) 1092 zvol_last_close(zv); 1093 1094 if (locked) 1095 mutex_exit(&spa_namespace_lock); 1096 return (error); 1097} 1098 1099static void 1100zvol_get_done(zgd_t *zgd, int error) 1101{ 1102 if (zgd->zgd_db) 1103 dmu_buf_rele(zgd->zgd_db, zgd); 1104 1105 zfs_range_unlock(zgd->zgd_rl); 1106 1107 if (error == 0 && zgd->zgd_bp) 1108 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1109 1110 kmem_free(zgd, sizeof (zgd_t)); 1111} 1112 1113/* 1114 * Get data to generate a TX_WRITE intent log record. 1115 */ 1116static int 1117zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1118{ 1119 zvol_state_t *zv = arg; 1120 objset_t *os = zv->zv_objset; 1121 uint64_t object = ZVOL_OBJ; 1122 uint64_t offset = lr->lr_offset; 1123 uint64_t size = lr->lr_length; /* length of user data */ 1124 blkptr_t *bp = &lr->lr_blkptr; 1125 dmu_buf_t *db; 1126 zgd_t *zgd; 1127 int error; 1128 1129 ASSERT(zio != NULL); 1130 ASSERT(size != 0); 1131 1132 zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1133 zgd->zgd_zilog = zv->zv_zilog; 1134 zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); 1135 1136 /* 1137 * Write records come in two flavors: immediate and indirect. 1138 * For small writes it's cheaper to store the data with the 1139 * log record (immediate); for large writes it's cheaper to 1140 * sync the data and get a pointer to it (indirect) so that 1141 * we don't have to write the data twice. 1142 */ 1143 if (buf != NULL) { /* immediate write */ 1144 error = dmu_read(os, object, offset, size, buf, 1145 DMU_READ_NO_PREFETCH); 1146 } else { 1147 size = zv->zv_volblocksize; 1148 offset = P2ALIGN(offset, size); 1149 error = dmu_buf_hold(os, object, offset, zgd, &db, 1150 DMU_READ_NO_PREFETCH); 1151 if (error == 0) { 1152 blkptr_t *obp = dmu_buf_get_blkptr(db); 1153 if (obp) { 1154 ASSERT(BP_IS_HOLE(bp)); 1155 *bp = *obp; 1156 } 1157 1158 zgd->zgd_db = db; 1159 zgd->zgd_bp = bp; 1160 1161 ASSERT(db->db_offset == offset); 1162 ASSERT(db->db_size == size); 1163 1164 error = dmu_sync(zio, lr->lr_common.lrc_txg, 1165 zvol_get_done, zgd); 1166 1167 if (error == 0) 1168 return (0); 1169 } 1170 } 1171 1172 zvol_get_done(zgd, error); 1173 1174 return (error); 1175} 1176 1177/* 1178 * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. 1179 * 1180 * We store data in the log buffers if it's small enough. 1181 * Otherwise we will later flush the data out via dmu_sync(). 1182 */ 1183ssize_t zvol_immediate_write_sz = 32768; 1184 1185static void 1186zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, 1187 boolean_t sync) 1188{ 1189 uint32_t blocksize = zv->zv_volblocksize; 1190 zilog_t *zilog = zv->zv_zilog; 1191 boolean_t slogging; 1192 ssize_t immediate_write_sz; 1193 1194 if (zil_replaying(zilog, tx)) 1195 return; 1196 1197 immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) 1198 ? 0 : zvol_immediate_write_sz; 1199 1200 slogging = spa_has_slogs(zilog->zl_spa) && 1201 (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); 1202 1203 while (resid) { 1204 itx_t *itx; 1205 lr_write_t *lr; 1206 ssize_t len; 1207 itx_wr_state_t write_state; 1208 1209 /* 1210 * Unlike zfs_log_write() we can be called with 1211 * upto DMU_MAX_ACCESS/2 (5MB) writes. 1212 */ 1213 if (blocksize > immediate_write_sz && !slogging && 1214 resid >= blocksize && off % blocksize == 0) { 1215 write_state = WR_INDIRECT; /* uses dmu_sync */ 1216 len = blocksize; 1217 } else if (sync) { 1218 write_state = WR_COPIED; 1219 len = MIN(ZIL_MAX_LOG_DATA, resid); 1220 } else { 1221 write_state = WR_NEED_COPY; 1222 len = MIN(ZIL_MAX_LOG_DATA, resid); 1223 } 1224 1225 itx = zil_itx_create(TX_WRITE, sizeof (*lr) + 1226 (write_state == WR_COPIED ? len : 0)); 1227 lr = (lr_write_t *)&itx->itx_lr; 1228 if (write_state == WR_COPIED && dmu_read(zv->zv_objset, 1229 ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { 1230 zil_itx_destroy(itx); 1231 itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1232 lr = (lr_write_t *)&itx->itx_lr; 1233 write_state = WR_NEED_COPY; 1234 } 1235 1236 itx->itx_wr_state = write_state; 1237 if (write_state == WR_NEED_COPY) 1238 itx->itx_sod += len; 1239 lr->lr_foid = ZVOL_OBJ; 1240 lr->lr_offset = off; 1241 lr->lr_length = len; 1242 lr->lr_blkoff = 0; 1243 BP_ZERO(&lr->lr_blkptr); 1244 1245 itx->itx_private = zv; 1246 itx->itx_sync = sync; 1247 1248 zil_itx_assign(zilog, itx, tx); 1249 1250 off += len; 1251 resid -= len; 1252 } 1253} 1254 1255#ifdef sun 1256static int 1257zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset, 1258 uint64_t size, boolean_t doread, boolean_t isdump) 1259{ 1260 vdev_disk_t *dvd; 1261 int c; 1262 int numerrors = 0; 1263 1264 if (vd->vdev_ops == &vdev_mirror_ops || 1265 vd->vdev_ops == &vdev_replacing_ops || 1266 vd->vdev_ops == &vdev_spare_ops) { 1267 for (c = 0; c < vd->vdev_children; c++) { 1268 int err = zvol_dumpio_vdev(vd->vdev_child[c], 1269 addr, offset, origoffset, size, doread, isdump); 1270 if (err != 0) { 1271 numerrors++; 1272 } else if (doread) { 1273 break; 1274 } 1275 } 1276 } 1277 1278 if (!vd->vdev_ops->vdev_op_leaf && vd->vdev_ops != &vdev_raidz_ops) 1279 return (numerrors < vd->vdev_children ? 0 : EIO); 1280 1281 if (doread && !vdev_readable(vd)) 1282 return (SET_ERROR(EIO)); 1283 else if (!doread && !vdev_writeable(vd)) 1284 return (SET_ERROR(EIO)); 1285 1286 if (vd->vdev_ops == &vdev_raidz_ops) { 1287 return (vdev_raidz_physio(vd, 1288 addr, size, offset, origoffset, doread, isdump)); 1289 } 1290 1291 offset += VDEV_LABEL_START_SIZE; 1292 1293 if (ddi_in_panic() || isdump) { 1294 ASSERT(!doread); 1295 if (doread) 1296 return (SET_ERROR(EIO)); 1297 dvd = vd->vdev_tsd; 1298 ASSERT3P(dvd, !=, NULL); 1299 return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), 1300 lbtodb(size))); 1301 } else { 1302 dvd = vd->vdev_tsd; 1303 ASSERT3P(dvd, !=, NULL); 1304 return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size, 1305 offset, doread ? B_READ : B_WRITE)); 1306 } 1307} 1308 1309static int 1310zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, 1311 boolean_t doread, boolean_t isdump) 1312{ 1313 vdev_t *vd; 1314 int error; 1315 zvol_extent_t *ze; 1316 spa_t *spa = dmu_objset_spa(zv->zv_objset); 1317 1318 /* Must be sector aligned, and not stradle a block boundary. */ 1319 if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || 1320 P2BOUNDARY(offset, size, zv->zv_volblocksize)) { 1321 return (SET_ERROR(EINVAL)); 1322 } 1323 ASSERT(size <= zv->zv_volblocksize); 1324 1325 /* Locate the extent this belongs to */ 1326 ze = list_head(&zv->zv_extents); 1327 while (offset >= ze->ze_nblks * zv->zv_volblocksize) { 1328 offset -= ze->ze_nblks * zv->zv_volblocksize; 1329 ze = list_next(&zv->zv_extents, ze); 1330 } 1331 1332 if (ze == NULL) 1333 return (SET_ERROR(EINVAL)); 1334 1335 if (!ddi_in_panic()) 1336 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 1337 1338 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); 1339 offset += DVA_GET_OFFSET(&ze->ze_dva); 1340 error = zvol_dumpio_vdev(vd, addr, offset, DVA_GET_OFFSET(&ze->ze_dva), 1341 size, doread, isdump); 1342 1343 if (!ddi_in_panic()) 1344 spa_config_exit(spa, SCL_STATE, FTAG); 1345 1346 return (error); 1347} 1348#endif /* sun */ 1349 1350void 1351zvol_strategy(struct bio *bp) 1352{ 1353 zvol_state_t *zv; 1354 uint64_t off, volsize; 1355 size_t resid; 1356 char *addr; 1357 objset_t *os; 1358 rl_t *rl; 1359 int error = 0; 1360 boolean_t doread = 0; 1361 boolean_t is_dumpified; 1362 boolean_t sync; 1363 1364 if (bp->bio_to) 1365 zv = bp->bio_to->private; 1366 else 1367 zv = bp->bio_dev->si_drv2; 1368 1369 if (zv == NULL) { 1370 error = ENXIO; 1371 goto out; 1372 } 1373 1374 if (bp->bio_cmd != BIO_READ && (zv->zv_flags & ZVOL_RDONLY)) { 1375 error = EROFS; 1376 goto out; 1377 } 1378 1379 switch (bp->bio_cmd) { 1380 case BIO_FLUSH: 1381 goto sync; 1382 case BIO_READ: 1383 doread = 1; 1384 case BIO_WRITE: 1385 case BIO_DELETE: 1386 break; 1387 default: 1388 error = EOPNOTSUPP; 1389 goto out; 1390 } 1391 1392 off = bp->bio_offset; 1393 volsize = zv->zv_volsize; 1394 1395 os = zv->zv_objset; 1396 ASSERT(os != NULL); 1397 1398 addr = bp->bio_data; 1399 resid = bp->bio_length; 1400 1401 if (resid > 0 && (off < 0 || off >= volsize)) { 1402 error = EIO; 1403 goto out; 1404 } 1405 1406#ifdef illumos 1407 is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED; 1408#else 1409 is_dumpified = B_FALSE; 1410#endif 1411 sync = !doread && !is_dumpified && 1412 zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; 1413 1414 /* 1415 * There must be no buffer changes when doing a dmu_sync() because 1416 * we can't change the data whilst calculating the checksum. 1417 */ 1418 rl = zfs_range_lock(&zv->zv_znode, off, resid, 1419 doread ? RL_READER : RL_WRITER); 1420 1421 if (bp->bio_cmd == BIO_DELETE) { 1422 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 1423 error = dmu_tx_assign(tx, TXG_WAIT); 1424 if (error != 0) { 1425 dmu_tx_abort(tx); 1426 } else { 1427 zvol_log_truncate(zv, tx, off, resid, B_TRUE); 1428 dmu_tx_commit(tx); 1429 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 1430 off, resid); 1431 resid = 0; 1432 } 1433 goto unlock; 1434 } 1435 1436 while (resid != 0 && off < volsize) { 1437 size_t size = MIN(resid, zvol_maxphys); 1438#ifdef illumos 1439 if (is_dumpified) { 1440 size = MIN(size, P2END(off, zv->zv_volblocksize) - off); 1441 error = zvol_dumpio(zv, addr, off, size, 1442 doread, B_FALSE); 1443 } else if (doread) { 1444#else 1445 if (doread) { 1446#endif 1447 error = dmu_read(os, ZVOL_OBJ, off, size, addr, 1448 DMU_READ_PREFETCH); 1449 } else { 1450 dmu_tx_t *tx = dmu_tx_create(os); 1451 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 1452 error = dmu_tx_assign(tx, TXG_WAIT); 1453 if (error) { 1454 dmu_tx_abort(tx); 1455 } else { 1456 dmu_write(os, ZVOL_OBJ, off, size, addr, tx); 1457 zvol_log_write(zv, tx, off, size, sync); 1458 dmu_tx_commit(tx); 1459 } 1460 } 1461 if (error) { 1462 /* convert checksum errors into IO errors */ 1463 if (error == ECKSUM) 1464 error = SET_ERROR(EIO); 1465 break; 1466 } 1467 off += size; 1468 addr += size; 1469 resid -= size; 1470 } 1471unlock: 1472 zfs_range_unlock(rl); 1473 1474 bp->bio_completed = bp->bio_length - resid; 1475 if (bp->bio_completed < bp->bio_length && off > volsize) 1476 error = EINVAL; 1477 1478 if (sync) { 1479sync: 1480 zil_commit(zv->zv_zilog, ZVOL_OBJ); 1481 } 1482out: 1483 if (bp->bio_to) 1484 g_io_deliver(bp, error); 1485 else 1486 biofinish(bp, NULL, error); 1487} 1488 1489#ifdef sun 1490/* 1491 * Set the buffer count to the zvol maximum transfer. 1492 * Using our own routine instead of the default minphys() 1493 * means that for larger writes we write bigger buffers on X86 1494 * (128K instead of 56K) and flush the disk write cache less often 1495 * (every zvol_maxphys - currently 1MB) instead of minphys (currently 1496 * 56K on X86 and 128K on sparc). 1497 */ 1498void 1499zvol_minphys(struct buf *bp) 1500{ 1501 if (bp->b_bcount > zvol_maxphys) 1502 bp->b_bcount = zvol_maxphys; 1503} 1504 1505int 1506zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) 1507{ 1508 minor_t minor = getminor(dev); 1509 zvol_state_t *zv; 1510 int error = 0; 1511 uint64_t size; 1512 uint64_t boff; 1513 uint64_t resid; 1514 1515 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1516 if (zv == NULL) 1517 return (SET_ERROR(ENXIO)); 1518 1519 if ((zv->zv_flags & ZVOL_DUMPIFIED) == 0) 1520 return (SET_ERROR(EINVAL)); 1521 1522 boff = ldbtob(blkno); 1523 resid = ldbtob(nblocks); 1524 1525 VERIFY3U(boff + resid, <=, zv->zv_volsize); 1526 1527 while (resid) { 1528 size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); 1529 error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE); 1530 if (error) 1531 break; 1532 boff += size; 1533 addr += size; 1534 resid -= size; 1535 } 1536 1537 return (error); 1538} 1539 1540/*ARGSUSED*/ 1541int 1542zvol_read(dev_t dev, uio_t *uio, cred_t *cr) 1543{ 1544 minor_t minor = getminor(dev); 1545#else 1546int 1547zvol_read(struct cdev *dev, struct uio *uio, int ioflag) 1548{ 1549#endif 1550 zvol_state_t *zv; 1551 uint64_t volsize; 1552 rl_t *rl; 1553 int error = 0; 1554 1555#ifdef sun 1556 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1557 if (zv == NULL) 1558 return (SET_ERROR(ENXIO)); 1559#else 1560 zv = dev->si_drv2; 1561#endif 1562 1563 volsize = zv->zv_volsize; 1564 if (uio->uio_resid > 0 && 1565 (uio->uio_loffset < 0 || uio->uio_loffset > volsize)) 1566 return (SET_ERROR(EIO)); 1567 1568#ifdef illumos 1569 if (zv->zv_flags & ZVOL_DUMPIFIED) { 1570 error = physio(zvol_strategy, NULL, dev, B_READ, 1571 zvol_minphys, uio); 1572 return (error); 1573 } 1574#endif 1575 1576 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1577 RL_READER); 1578 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1579 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1580 1581 /* don't read past the end */ 1582 if (bytes > volsize - uio->uio_loffset) 1583 bytes = volsize - uio->uio_loffset; 1584 1585 error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes); 1586 if (error) { 1587 /* convert checksum errors into IO errors */ 1588 if (error == ECKSUM) 1589 error = SET_ERROR(EIO); 1590 break; 1591 } 1592 } 1593 zfs_range_unlock(rl); 1594 return (error); 1595} 1596 1597#ifdef sun 1598/*ARGSUSED*/ 1599int 1600zvol_write(dev_t dev, uio_t *uio, cred_t *cr) 1601{ 1602 minor_t minor = getminor(dev); 1603#else 1604int 1605zvol_write(struct cdev *dev, struct uio *uio, int ioflag) 1606{ 1607#endif 1608 zvol_state_t *zv; 1609 uint64_t volsize; 1610 rl_t *rl; 1611 int error = 0; 1612 boolean_t sync; 1613 1614#ifdef sun 1615 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1616 if (zv == NULL) 1617 return (SET_ERROR(ENXIO)); 1618#else 1619 zv = dev->si_drv2; 1620#endif 1621 1622 volsize = zv->zv_volsize; 1623 if (uio->uio_resid > 0 && 1624 (uio->uio_loffset < 0 || uio->uio_loffset > volsize)) 1625 return (SET_ERROR(EIO)); 1626 1627#ifdef illumos 1628 if (zv->zv_flags & ZVOL_DUMPIFIED) { 1629 error = physio(zvol_strategy, NULL, dev, B_WRITE, 1630 zvol_minphys, uio); 1631 return (error); 1632 } 1633#endif 1634 1635#ifdef sun 1636 sync = !(zv->zv_flags & ZVOL_WCE) || 1637#else 1638 sync = (ioflag & IO_SYNC) || 1639#endif 1640 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 1641 1642 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1643 RL_WRITER); 1644 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1645 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1646 uint64_t off = uio->uio_loffset; 1647 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 1648 1649 if (bytes > volsize - off) /* don't write past the end */ 1650 bytes = volsize - off; 1651 1652 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 1653 error = dmu_tx_assign(tx, TXG_WAIT); 1654 if (error) { 1655 dmu_tx_abort(tx); 1656 break; 1657 } 1658 error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx); 1659 if (error == 0) 1660 zvol_log_write(zv, tx, off, bytes, sync); 1661 dmu_tx_commit(tx); 1662 1663 if (error) 1664 break; 1665 } 1666 zfs_range_unlock(rl); 1667 if (sync) 1668 zil_commit(zv->zv_zilog, ZVOL_OBJ); 1669 return (error); 1670} 1671 1672#ifdef sun 1673int 1674zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) 1675{ 1676 struct uuid uuid = EFI_RESERVED; 1677 efi_gpe_t gpe = { 0 }; 1678 uint32_t crc; 1679 dk_efi_t efi; 1680 int length; 1681 char *ptr; 1682 1683 if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) 1684 return (SET_ERROR(EFAULT)); 1685 ptr = (char *)(uintptr_t)efi.dki_data_64; 1686 length = efi.dki_length; 1687 /* 1688 * Some clients may attempt to request a PMBR for the 1689 * zvol. Currently this interface will return EINVAL to 1690 * such requests. These requests could be supported by 1691 * adding a check for lba == 0 and consing up an appropriate 1692 * PMBR. 1693 */ 1694 if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) 1695 return (SET_ERROR(EINVAL)); 1696 1697 gpe.efi_gpe_StartingLBA = LE_64(34ULL); 1698 gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); 1699 UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); 1700 1701 if (efi.dki_lba == 1) { 1702 efi_gpt_t gpt = { 0 }; 1703 1704 gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1705 gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 1706 gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); 1707 gpt.efi_gpt_MyLBA = LE_64(1ULL); 1708 gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); 1709 gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1); 1710 gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1711 gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); 1712 gpt.efi_gpt_SizeOfPartitionEntry = 1713 LE_32(sizeof (efi_gpe_t)); 1714 CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); 1715 gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 1716 CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); 1717 gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); 1718 if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), 1719 flag)) 1720 return (SET_ERROR(EFAULT)); 1721 ptr += sizeof (gpt); 1722 length -= sizeof (gpt); 1723 } 1724 if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), 1725 length), flag)) 1726 return (SET_ERROR(EFAULT)); 1727 return (0); 1728} 1729 1730/* 1731 * BEGIN entry points to allow external callers access to the volume. 1732 */ 1733/* 1734 * Return the volume parameters needed for access from an external caller. 1735 * These values are invariant as long as the volume is held open. 1736 */ 1737int 1738zvol_get_volume_params(minor_t minor, uint64_t *blksize, 1739 uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl, 1740 void **rl_hdl, void **bonus_hdl) 1741{ 1742 zvol_state_t *zv; 1743 1744 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1745 if (zv == NULL) 1746 return (SET_ERROR(ENXIO)); 1747 if (zv->zv_flags & ZVOL_DUMPIFIED) 1748 return (SET_ERROR(ENXIO)); 1749 1750 ASSERT(blksize && max_xfer_len && minor_hdl && 1751 objset_hdl && zil_hdl && rl_hdl && bonus_hdl); 1752 1753 *blksize = zv->zv_volblocksize; 1754 *max_xfer_len = (uint64_t)zvol_maxphys; 1755 *minor_hdl = zv; 1756 *objset_hdl = zv->zv_objset; 1757 *zil_hdl = zv->zv_zilog; 1758 *rl_hdl = &zv->zv_znode; 1759 *bonus_hdl = zv->zv_dbuf; 1760 return (0); 1761} 1762 1763/* 1764 * Return the current volume size to an external caller. 1765 * The size can change while the volume is open. 1766 */ 1767uint64_t 1768zvol_get_volume_size(void *minor_hdl) 1769{ 1770 zvol_state_t *zv = minor_hdl; 1771 1772 return (zv->zv_volsize); 1773} 1774 1775/* 1776 * Return the current WCE setting to an external caller. 1777 * The WCE setting can change while the volume is open. 1778 */ 1779int 1780zvol_get_volume_wce(void *minor_hdl) 1781{ 1782 zvol_state_t *zv = minor_hdl; 1783 1784 return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0); 1785} 1786 1787/* 1788 * Entry point for external callers to zvol_log_write 1789 */ 1790void 1791zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid, 1792 boolean_t sync) 1793{ 1794 zvol_state_t *zv = minor_hdl; 1795 1796 zvol_log_write(zv, tx, off, resid, sync); 1797} 1798/* 1799 * END entry points to allow external callers access to the volume. 1800 */ 1801#endif /* sun */ 1802 1803/* 1804 * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE. 1805 */ 1806static void 1807zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len, 1808 boolean_t sync) 1809{ 1810 itx_t *itx; 1811 lr_truncate_t *lr; 1812 zilog_t *zilog = zv->zv_zilog; 1813 1814 if (zil_replaying(zilog, tx)) 1815 return; 1816 1817 itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 1818 lr = (lr_truncate_t *)&itx->itx_lr; 1819 lr->lr_foid = ZVOL_OBJ; 1820 lr->lr_offset = off; 1821 lr->lr_length = len; 1822 1823 itx->itx_sync = sync; 1824 zil_itx_assign(zilog, itx, tx); 1825} 1826 1827#ifdef sun 1828/* 1829 * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). 1830 * Also a dirtbag dkio ioctl for unmap/free-block functionality. 1831 */ 1832/*ARGSUSED*/ 1833int 1834zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 1835{ 1836 zvol_state_t *zv; 1837 struct dk_callback *dkc; 1838 int error = 0; 1839 rl_t *rl; 1840 1841 mutex_enter(&spa_namespace_lock); 1842 1843 zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL); 1844 1845 if (zv == NULL) { 1846 mutex_exit(&spa_namespace_lock); 1847 return (SET_ERROR(ENXIO)); 1848 } 1849 ASSERT(zv->zv_total_opens > 0); 1850 1851 switch (cmd) { 1852 1853 case DKIOCINFO: 1854 { 1855 struct dk_cinfo dki; 1856 1857 bzero(&dki, sizeof (dki)); 1858 (void) strcpy(dki.dki_cname, "zvol"); 1859 (void) strcpy(dki.dki_dname, "zvol"); 1860 dki.dki_ctype = DKC_UNKNOWN; 1861 dki.dki_unit = getminor(dev); 1862 dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); 1863 mutex_exit(&spa_namespace_lock); 1864 if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) 1865 error = SET_ERROR(EFAULT); 1866 return (error); 1867 } 1868 1869 case DKIOCGMEDIAINFO: 1870 { 1871 struct dk_minfo dkm; 1872 1873 bzero(&dkm, sizeof (dkm)); 1874 dkm.dki_lbsize = 1U << zv->zv_min_bs; 1875 dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 1876 dkm.dki_media_type = DK_UNKNOWN; 1877 mutex_exit(&spa_namespace_lock); 1878 if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) 1879 error = SET_ERROR(EFAULT); 1880 return (error); 1881 } 1882 1883 case DKIOCGMEDIAINFOEXT: 1884 { 1885 struct dk_minfo_ext dkmext; 1886 1887 bzero(&dkmext, sizeof (dkmext)); 1888 dkmext.dki_lbsize = 1U << zv->zv_min_bs; 1889 dkmext.dki_pbsize = zv->zv_volblocksize; 1890 dkmext.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 1891 dkmext.dki_media_type = DK_UNKNOWN; 1892 mutex_exit(&spa_namespace_lock); 1893 if (ddi_copyout(&dkmext, (void *)arg, sizeof (dkmext), flag)) 1894 error = SET_ERROR(EFAULT); 1895 return (error); 1896 } 1897 1898 case DKIOCGETEFI: 1899 { 1900 uint64_t vs = zv->zv_volsize; 1901 uint8_t bs = zv->zv_min_bs; 1902 1903 mutex_exit(&spa_namespace_lock); 1904 error = zvol_getefi((void *)arg, flag, vs, bs); 1905 return (error); 1906 } 1907 1908 case DKIOCFLUSHWRITECACHE: 1909 dkc = (struct dk_callback *)arg; 1910 mutex_exit(&spa_namespace_lock); 1911 zil_commit(zv->zv_zilog, ZVOL_OBJ); 1912 if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { 1913 (*dkc->dkc_callback)(dkc->dkc_cookie, error); 1914 error = 0; 1915 } 1916 return (error); 1917 1918 case DKIOCGETWCE: 1919 { 1920 int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0; 1921 if (ddi_copyout(&wce, (void *)arg, sizeof (int), 1922 flag)) 1923 error = SET_ERROR(EFAULT); 1924 break; 1925 } 1926 case DKIOCSETWCE: 1927 { 1928 int wce; 1929 if (ddi_copyin((void *)arg, &wce, sizeof (int), 1930 flag)) { 1931 error = SET_ERROR(EFAULT); 1932 break; 1933 } 1934 if (wce) { 1935 zv->zv_flags |= ZVOL_WCE; 1936 mutex_exit(&spa_namespace_lock); 1937 } else { 1938 zv->zv_flags &= ~ZVOL_WCE; 1939 mutex_exit(&spa_namespace_lock); 1940 zil_commit(zv->zv_zilog, ZVOL_OBJ); 1941 } 1942 return (0); 1943 } 1944 1945 case DKIOCGGEOM: 1946 case DKIOCGVTOC: 1947 /* 1948 * commands using these (like prtvtoc) expect ENOTSUP 1949 * since we're emulating an EFI label 1950 */ 1951 error = SET_ERROR(ENOTSUP); 1952 break; 1953 1954 case DKIOCDUMPINIT: 1955 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1956 RL_WRITER); 1957 error = zvol_dumpify(zv); 1958 zfs_range_unlock(rl); 1959 break; 1960 1961 case DKIOCDUMPFINI: 1962 if (!(zv->zv_flags & ZVOL_DUMPIFIED)) 1963 break; 1964 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1965 RL_WRITER); 1966 error = zvol_dump_fini(zv); 1967 zfs_range_unlock(rl); 1968 break; 1969 1970 case DKIOCFREE: 1971 { 1972 dkioc_free_t df; 1973 dmu_tx_t *tx; 1974 1975 if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) { 1976 error = SET_ERROR(EFAULT); 1977 break; 1978 } 1979 1980 /* 1981 * Apply Postel's Law to length-checking. If they overshoot, 1982 * just blank out until the end, if there's a need to blank 1983 * out anything. 1984 */ 1985 if (df.df_start >= zv->zv_volsize) 1986 break; /* No need to do anything... */ 1987 if (df.df_start + df.df_length > zv->zv_volsize) 1988 df.df_length = DMU_OBJECT_END; 1989 1990 rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, 1991 RL_WRITER); 1992 tx = dmu_tx_create(zv->zv_objset); 1993 dmu_tx_mark_netfree(tx); 1994 error = dmu_tx_assign(tx, TXG_WAIT); 1995 if (error != 0) { 1996 dmu_tx_abort(tx); 1997 } else { 1998 zvol_log_truncate(zv, tx, df.df_start, 1999 df.df_length, B_TRUE); 2000 dmu_tx_commit(tx); 2001 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 2002 df.df_start, df.df_length); 2003 } 2004 2005 zfs_range_unlock(rl); 2006 2007 if (error == 0) { 2008 /* 2009 * If the write-cache is disabled or 'sync' property 2010 * is set to 'always' then treat this as a synchronous 2011 * operation (i.e. commit to zil). 2012 */ 2013 if (!(zv->zv_flags & ZVOL_WCE) || 2014 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) 2015 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2016 2017 /* 2018 * If the caller really wants synchronous writes, and 2019 * can't wait for them, don't return until the write 2020 * is done. 2021 */ 2022 if (df.df_flags & DF_WAIT_SYNC) { 2023 txg_wait_synced( 2024 dmu_objset_pool(zv->zv_objset), 0); 2025 } 2026 } 2027 break; 2028 } 2029 2030 default: 2031 error = SET_ERROR(ENOTTY); 2032 break; 2033 2034 } 2035 mutex_exit(&spa_namespace_lock); 2036 return (error); 2037} 2038#endif /* sun */ 2039 2040int 2041zvol_busy(void) 2042{ 2043 return (zvol_minors != 0); 2044} 2045 2046void 2047zvol_init(void) 2048{ 2049 VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t), 2050 1) == 0); 2051 ZFS_LOG(1, "ZVOL Initialized."); 2052} 2053 2054void 2055zvol_fini(void) 2056{ 2057 ddi_soft_state_fini(&zfsdev_state); 2058 ZFS_LOG(1, "ZVOL Deinitialized."); 2059} 2060 2061#ifdef sun 2062/*ARGSUSED*/ 2063static int 2064zfs_mvdev_dump_feature_check(void *arg, dmu_tx_t *tx) 2065{ 2066 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 2067 2068 if (spa_feature_is_active(spa, SPA_FEATURE_MULTI_VDEV_CRASH_DUMP)) 2069 return (1); 2070 return (0); 2071} 2072 2073/*ARGSUSED*/ 2074static void 2075zfs_mvdev_dump_activate_feature_sync(void *arg, dmu_tx_t *tx) 2076{ 2077 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 2078 2079 spa_feature_incr(spa, SPA_FEATURE_MULTI_VDEV_CRASH_DUMP, tx); 2080} 2081 2082static int 2083zvol_dump_init(zvol_state_t *zv, boolean_t resize) 2084{ 2085 dmu_tx_t *tx; 2086 int error; 2087 objset_t *os = zv->zv_objset; 2088 spa_t *spa = dmu_objset_spa(os); 2089 vdev_t *vd = spa->spa_root_vdev; 2090 nvlist_t *nv = NULL; 2091 uint64_t version = spa_version(spa); 2092 enum zio_checksum checksum; 2093 2094 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 2095 ASSERT(vd->vdev_ops == &vdev_root_ops); 2096 2097 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, 2098 DMU_OBJECT_END); 2099 /* wait for dmu_free_long_range to actually free the blocks */ 2100 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 2101 2102 /* 2103 * If the pool on which the dump device is being initialized has more 2104 * than one child vdev, check that the MULTI_VDEV_CRASH_DUMP feature is 2105 * enabled. If so, bump that feature's counter to indicate that the 2106 * feature is active. We also check the vdev type to handle the 2107 * following case: 2108 * # zpool create test raidz disk1 disk2 disk3 2109 * Now have spa_root_vdev->vdev_children == 1 (the raidz vdev), 2110 * the raidz vdev itself has 3 children. 2111 */ 2112 if (vd->vdev_children > 1 || vd->vdev_ops == &vdev_raidz_ops) { 2113 if (!spa_feature_is_enabled(spa, 2114 SPA_FEATURE_MULTI_VDEV_CRASH_DUMP)) 2115 return (SET_ERROR(ENOTSUP)); 2116 (void) dsl_sync_task(spa_name(spa), 2117 zfs_mvdev_dump_feature_check, 2118 zfs_mvdev_dump_activate_feature_sync, NULL, 2119 2, ZFS_SPACE_CHECK_RESERVED); 2120 } 2121 2122 tx = dmu_tx_create(os); 2123 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2124 dmu_tx_hold_bonus(tx, ZVOL_OBJ); 2125 error = dmu_tx_assign(tx, TXG_WAIT); 2126 if (error) { 2127 dmu_tx_abort(tx); 2128 return (error); 2129 } 2130 2131 /* 2132 * If MULTI_VDEV_CRASH_DUMP is active, use the NOPARITY checksum 2133 * function. Otherwise, use the old default -- OFF. 2134 */ 2135 checksum = spa_feature_is_active(spa, 2136 SPA_FEATURE_MULTI_VDEV_CRASH_DUMP) ? ZIO_CHECKSUM_NOPARITY : 2137 ZIO_CHECKSUM_OFF; 2138 2139 /* 2140 * If we are resizing the dump device then we only need to 2141 * update the refreservation to match the newly updated 2142 * zvolsize. Otherwise, we save off the original state of the 2143 * zvol so that we can restore them if the zvol is ever undumpified. 2144 */ 2145 if (resize) { 2146 error = zap_update(os, ZVOL_ZAP_OBJ, 2147 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 2148 &zv->zv_volsize, tx); 2149 } else { 2150 uint64_t checksum, compress, refresrv, vbs, dedup; 2151 2152 error = dsl_prop_get_integer(zv->zv_name, 2153 zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); 2154 error = error ? error : dsl_prop_get_integer(zv->zv_name, 2155 zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); 2156 error = error ? error : dsl_prop_get_integer(zv->zv_name, 2157 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); 2158 error = error ? error : dsl_prop_get_integer(zv->zv_name, 2159 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL); 2160 if (version >= SPA_VERSION_DEDUP) { 2161 error = error ? error : 2162 dsl_prop_get_integer(zv->zv_name, 2163 zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL); 2164 } 2165 2166 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 2167 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, 2168 &compress, tx); 2169 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 2170 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); 2171 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 2172 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 2173 &refresrv, tx); 2174 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 2175 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, 2176 &vbs, tx); 2177 error = error ? error : dmu_object_set_blocksize( 2178 os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx); 2179 if (version >= SPA_VERSION_DEDUP) { 2180 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 2181 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, 2182 &dedup, tx); 2183 } 2184 if (error == 0) 2185 zv->zv_volblocksize = SPA_MAXBLOCKSIZE; 2186 } 2187 dmu_tx_commit(tx); 2188 2189 /* 2190 * We only need update the zvol's property if we are initializing 2191 * the dump area for the first time. 2192 */ 2193 if (!resize) { 2194 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2195 VERIFY(nvlist_add_uint64(nv, 2196 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); 2197 VERIFY(nvlist_add_uint64(nv, 2198 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 2199 ZIO_COMPRESS_OFF) == 0); 2200 VERIFY(nvlist_add_uint64(nv, 2201 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 2202 checksum) == 0); 2203 if (version >= SPA_VERSION_DEDUP) { 2204 VERIFY(nvlist_add_uint64(nv, 2205 zfs_prop_to_name(ZFS_PROP_DEDUP), 2206 ZIO_CHECKSUM_OFF) == 0); 2207 } 2208 2209 error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 2210 nv, NULL); 2211 nvlist_free(nv); 2212 2213 if (error) 2214 return (error); 2215 } 2216 2217 /* Allocate the space for the dump */ 2218 error = zvol_prealloc(zv); 2219 return (error); 2220} 2221 2222static int 2223zvol_dumpify(zvol_state_t *zv) 2224{ 2225 int error = 0; 2226 uint64_t dumpsize = 0; 2227 dmu_tx_t *tx; 2228 objset_t *os = zv->zv_objset; 2229 2230 if (zv->zv_flags & ZVOL_RDONLY) 2231 return (SET_ERROR(EROFS)); 2232 2233 if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 2234 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { 2235 boolean_t resize = (dumpsize > 0); 2236 2237 if ((error = zvol_dump_init(zv, resize)) != 0) { 2238 (void) zvol_dump_fini(zv); 2239 return (error); 2240 } 2241 } 2242 2243 /* 2244 * Build up our lba mapping. 2245 */ 2246 error = zvol_get_lbas(zv); 2247 if (error) { 2248 (void) zvol_dump_fini(zv); 2249 return (error); 2250 } 2251 2252 tx = dmu_tx_create(os); 2253 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2254 error = dmu_tx_assign(tx, TXG_WAIT); 2255 if (error) { 2256 dmu_tx_abort(tx); 2257 (void) zvol_dump_fini(zv); 2258 return (error); 2259 } 2260 2261 zv->zv_flags |= ZVOL_DUMPIFIED; 2262 error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, 2263 &zv->zv_volsize, tx); 2264 dmu_tx_commit(tx); 2265 2266 if (error) { 2267 (void) zvol_dump_fini(zv); 2268 return (error); 2269 } 2270 2271 txg_wait_synced(dmu_objset_pool(os), 0); 2272 return (0); 2273} 2274 2275static int 2276zvol_dump_fini(zvol_state_t *zv) 2277{ 2278 dmu_tx_t *tx; 2279 objset_t *os = zv->zv_objset; 2280 nvlist_t *nv; 2281 int error = 0; 2282 uint64_t checksum, compress, refresrv, vbs, dedup; 2283 uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); 2284 2285 /* 2286 * Attempt to restore the zvol back to its pre-dumpified state. 2287 * This is a best-effort attempt as it's possible that not all 2288 * of these properties were initialized during the dumpify process 2289 * (i.e. error during zvol_dump_init). 2290 */ 2291 2292 tx = dmu_tx_create(os); 2293 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2294 error = dmu_tx_assign(tx, TXG_WAIT); 2295 if (error) { 2296 dmu_tx_abort(tx); 2297 return (error); 2298 } 2299 (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); 2300 dmu_tx_commit(tx); 2301 2302 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2303 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); 2304 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2305 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); 2306 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2307 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); 2308 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2309 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs); 2310 2311 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2312 (void) nvlist_add_uint64(nv, 2313 zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); 2314 (void) nvlist_add_uint64(nv, 2315 zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); 2316 (void) nvlist_add_uint64(nv, 2317 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); 2318 if (version >= SPA_VERSION_DEDUP && 2319 zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2320 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) { 2321 (void) nvlist_add_uint64(nv, 2322 zfs_prop_to_name(ZFS_PROP_DEDUP), dedup); 2323 } 2324 (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 2325 nv, NULL); 2326 nvlist_free(nv); 2327 2328 zvol_free_extents(zv); 2329 zv->zv_flags &= ~ZVOL_DUMPIFIED; 2330 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); 2331 /* wait for dmu_free_long_range to actually free the blocks */ 2332 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 2333 tx = dmu_tx_create(os); 2334 dmu_tx_hold_bonus(tx, ZVOL_OBJ); 2335 error = dmu_tx_assign(tx, TXG_WAIT); 2336 if (error) { 2337 dmu_tx_abort(tx); 2338 return (error); 2339 } 2340 if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0) 2341 zv->zv_volblocksize = vbs; 2342 dmu_tx_commit(tx); 2343 2344 return (0); 2345} 2346#endif /* sun */ 2347 2348static void 2349zvol_geom_run(zvol_state_t *zv) 2350{ 2351 struct g_provider *pp; 2352 2353 pp = zv->zv_provider; 2354 g_error_provider(pp, 0); 2355 2356 kproc_kthread_add(zvol_geom_worker, zv, &zfsproc, NULL, 0, 0, 2357 "zfskern", "zvol %s", pp->name + sizeof(ZVOL_DRIVER)); 2358} 2359 2360static void 2361zvol_geom_destroy(zvol_state_t *zv) 2362{ 2363 struct g_provider *pp; 2364 2365 g_topology_assert(); 2366 2367 mtx_lock(&zv->zv_queue_mtx); 2368 zv->zv_state = 1; 2369 wakeup_one(&zv->zv_queue); 2370 while (zv->zv_state != 2) 2371 msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0); 2372 mtx_destroy(&zv->zv_queue_mtx); 2373 2374 pp = zv->zv_provider; 2375 zv->zv_provider = NULL; 2376 pp->private = NULL; 2377 g_wither_geom(pp->geom, ENXIO); 2378} 2379 2380static int 2381zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace) 2382{ 2383 int count, error, flags; 2384 2385 g_topology_assert(); 2386 2387 /* 2388 * To make it easier we expect either open or close, but not both 2389 * at the same time. 2390 */ 2391 KASSERT((acr >= 0 && acw >= 0 && ace >= 0) || 2392 (acr <= 0 && acw <= 0 && ace <= 0), 2393 ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).", 2394 pp->name, acr, acw, ace)); 2395 2396 if (pp->private == NULL) { 2397 if (acr <= 0 && acw <= 0 && ace <= 0) 2398 return (0); 2399 return (pp->error); 2400 } 2401 2402 /* 2403 * We don't pass FEXCL flag to zvol_open()/zvol_close() if ace != 0, 2404 * because GEOM already handles that and handles it a bit differently. 2405 * GEOM allows for multiple read/exclusive consumers and ZFS allows 2406 * only one exclusive consumer, no matter if it is reader or writer. 2407 * I like better the way GEOM works so I'll leave it for GEOM to 2408 * decide what to do. 2409 */ 2410 2411 count = acr + acw + ace; 2412 if (count == 0) 2413 return (0); 2414 2415 flags = 0; 2416 if (acr != 0 || ace != 0) 2417 flags |= FREAD; 2418 if (acw != 0) 2419 flags |= FWRITE; 2420 2421 g_topology_unlock(); 2422 if (count > 0) 2423 error = zvol_open(pp, flags, count); 2424 else 2425 error = zvol_close(pp, flags, -count); 2426 g_topology_lock(); 2427 return (error); 2428} 2429 2430static void 2431zvol_geom_start(struct bio *bp) 2432{ 2433 zvol_state_t *zv; 2434 boolean_t first; 2435 2436 zv = bp->bio_to->private; 2437 ASSERT(zv != NULL); 2438 switch (bp->bio_cmd) { 2439 case BIO_FLUSH: 2440 if (!THREAD_CAN_SLEEP()) 2441 goto enqueue; 2442 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2443 g_io_deliver(bp, 0); 2444 break; 2445 case BIO_READ: 2446 case BIO_WRITE: 2447 case BIO_DELETE: 2448 if (!THREAD_CAN_SLEEP()) 2449 goto enqueue; 2450 zvol_strategy(bp); 2451 break; 2452 case BIO_GETATTR: 2453 if (g_handleattr_int(bp, "GEOM::candelete", 1)) 2454 return; 2455 /* FALLTHROUGH */ 2456 default: 2457 g_io_deliver(bp, EOPNOTSUPP); 2458 break; 2459 } 2460 return; 2461 2462enqueue: 2463 mtx_lock(&zv->zv_queue_mtx); 2464 first = (bioq_first(&zv->zv_queue) == NULL); 2465 bioq_insert_tail(&zv->zv_queue, bp); 2466 mtx_unlock(&zv->zv_queue_mtx); 2467 if (first) 2468 wakeup_one(&zv->zv_queue); 2469} 2470 2471static void 2472zvol_geom_worker(void *arg) 2473{ 2474 zvol_state_t *zv; 2475 struct bio *bp; 2476 2477 thread_lock(curthread); 2478 sched_prio(curthread, PRIBIO); 2479 thread_unlock(curthread); 2480 2481 zv = arg; 2482 for (;;) { 2483 mtx_lock(&zv->zv_queue_mtx); 2484 bp = bioq_takefirst(&zv->zv_queue); 2485 if (bp == NULL) { 2486 if (zv->zv_state == 1) { 2487 zv->zv_state = 2; 2488 wakeup(&zv->zv_state); 2489 mtx_unlock(&zv->zv_queue_mtx); 2490 kthread_exit(); 2491 } 2492 msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP, 2493 "zvol:io", 0); 2494 continue; 2495 } 2496 mtx_unlock(&zv->zv_queue_mtx); 2497 switch (bp->bio_cmd) { 2498 case BIO_FLUSH: 2499 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2500 g_io_deliver(bp, 0); 2501 break; 2502 case BIO_READ: 2503 case BIO_WRITE: 2504 zvol_strategy(bp); 2505 break; 2506 } 2507 } 2508} 2509 2510extern boolean_t dataset_name_hidden(const char *name); 2511 2512static int 2513zvol_create_snapshots(objset_t *os, const char *name) 2514{ 2515 uint64_t cookie, obj; 2516 char *sname; 2517 int error, len; 2518 2519 cookie = obj = 0; 2520 sname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2521 2522#if 0 2523 (void) dmu_objset_find(name, dmu_objset_prefetch, NULL, 2524 DS_FIND_SNAPSHOTS); 2525#endif 2526 2527 for (;;) { 2528 len = snprintf(sname, MAXPATHLEN, "%s@", name); 2529 if (len >= MAXPATHLEN) { 2530 dmu_objset_rele(os, FTAG); 2531 error = ENAMETOOLONG; 2532 break; 2533 } 2534 2535 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 2536 error = dmu_snapshot_list_next(os, MAXPATHLEN - len, 2537 sname + len, &obj, &cookie, NULL); 2538 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 2539 if (error != 0) { 2540 if (error == ENOENT) 2541 error = 0; 2542 break; 2543 } 2544 2545 if ((error = zvol_create_minor(sname)) != 0) { 2546 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 2547 sname, error); 2548 break; 2549 } 2550 } 2551 2552 kmem_free(sname, MAXPATHLEN); 2553 return (error); 2554} 2555 2556int 2557zvol_create_minors(const char *name) 2558{ 2559 uint64_t cookie; 2560 objset_t *os; 2561 char *osname, *p; 2562 int error, len; 2563 2564 if (dataset_name_hidden(name)) 2565 return (0); 2566 2567 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 2568 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 2569 name, error); 2570 return (error); 2571 } 2572 if (dmu_objset_type(os) == DMU_OST_ZVOL) { 2573 dsl_dataset_long_hold(os->os_dsl_dataset, FTAG); 2574 dsl_pool_rele(dmu_objset_pool(os), FTAG); 2575 error = zvol_create_minor(name); 2576 if (error == 0 || error == EEXIST) { 2577 error = zvol_create_snapshots(os, name); 2578 } else { 2579 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 2580 name, error); 2581 } 2582 dsl_dataset_long_rele(os->os_dsl_dataset, FTAG); 2583 dsl_dataset_rele(os->os_dsl_dataset, FTAG); 2584 return (error); 2585 } 2586 if (dmu_objset_type(os) != DMU_OST_ZFS) { 2587 dmu_objset_rele(os, FTAG); 2588 return (0); 2589 } 2590 2591 osname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2592 if (snprintf(osname, MAXPATHLEN, "%s/", name) >= MAXPATHLEN) { 2593 dmu_objset_rele(os, FTAG); 2594 kmem_free(osname, MAXPATHLEN); 2595 return (ENOENT); 2596 } 2597 p = osname + strlen(osname); 2598 len = MAXPATHLEN - (p - osname); 2599 2600#if 0 2601 /* Prefetch the datasets. */ 2602 cookie = 0; 2603 while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { 2604 if (!dataset_name_hidden(osname)) 2605 (void) dmu_objset_prefetch(osname, NULL); 2606 } 2607#endif 2608 2609 cookie = 0; 2610 while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL, 2611 &cookie) == 0) { 2612 dmu_objset_rele(os, FTAG); 2613 (void)zvol_create_minors(osname); 2614 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 2615 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 2616 name, error); 2617 return (error); 2618 } 2619 } 2620 2621 dmu_objset_rele(os, FTAG); 2622 kmem_free(osname, MAXPATHLEN); 2623 return (0); 2624} 2625 2626static void 2627zvol_rename_minor(zvol_state_t *zv, const char *newname) 2628{ 2629 struct g_geom *gp; 2630 struct g_provider *pp; 2631 struct cdev *dev; 2632 2633 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 2634 2635 if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 2636 g_topology_lock(); 2637 pp = zv->zv_provider; 2638 ASSERT(pp != NULL); 2639 gp = pp->geom; 2640 ASSERT(gp != NULL); 2641 2642 zv->zv_provider = NULL; 2643 g_wither_provider(pp, ENXIO); 2644 2645 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname); 2646 pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; 2647 pp->sectorsize = DEV_BSIZE; 2648 pp->mediasize = zv->zv_volsize; 2649 pp->private = zv; 2650 zv->zv_provider = pp; 2651 g_error_provider(pp, 0); 2652 g_topology_unlock(); 2653 } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 2654 dev = zv->zv_dev; 2655 ASSERT(dev != NULL); 2656 zv->zv_dev = NULL; 2657 destroy_dev(dev); 2658 2659 if (make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, 2660 &dev, &zvol_cdevsw, NULL, UID_ROOT, GID_OPERATOR, 2661 0640, "%s/%s", ZVOL_DRIVER, newname) == 0) { 2662 zv->zv_dev = dev; 2663 dev->si_iosize_max = MAXPHYS; 2664 dev->si_drv2 = zv; 2665 } 2666 } 2667 strlcpy(zv->zv_name, newname, sizeof(zv->zv_name)); 2668} 2669 2670void 2671zvol_rename_minors(const char *oldname, const char *newname) 2672{ 2673 char name[MAXPATHLEN]; 2674 struct g_provider *pp; 2675 struct g_geom *gp; 2676 size_t oldnamelen, newnamelen; 2677 zvol_state_t *zv; 2678 char *namebuf; 2679 boolean_t locked = B_FALSE; 2680 2681 oldnamelen = strlen(oldname); 2682 newnamelen = strlen(newname); 2683 2684 DROP_GIANT(); 2685 /* See comment in zvol_open(). */ 2686 if (!MUTEX_HELD(&spa_namespace_lock)) { 2687 mutex_enter(&spa_namespace_lock); 2688 locked = B_TRUE; 2689 } 2690 2691 LIST_FOREACH(zv, &all_zvols, zv_links) { 2692 if (strcmp(zv->zv_name, oldname) == 0) { 2693 zvol_rename_minor(zv, newname); 2694 } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 && 2695 (zv->zv_name[oldnamelen] == '/' || 2696 zv->zv_name[oldnamelen] == '@')) { 2697 snprintf(name, sizeof(name), "%s%c%s", newname, 2698 zv->zv_name[oldnamelen], 2699 zv->zv_name + oldnamelen + 1); 2700 zvol_rename_minor(zv, name); 2701 } 2702 } 2703 2704 if (locked) 2705 mutex_exit(&spa_namespace_lock); 2706 PICKUP_GIANT(); 2707} 2708 2709static int 2710zvol_d_open(struct cdev *dev, int flags, int fmt, struct thread *td) 2711{ 2712 zvol_state_t *zv; 2713 int err = 0; 2714 2715 mutex_enter(&spa_namespace_lock); 2716 zv = dev->si_drv2; 2717 if (zv == NULL) { 2718 mutex_exit(&spa_namespace_lock); 2719 return(ENXIO); /* zvol_create_minor() not done yet */ 2720 } 2721 2722 if (zv->zv_total_opens == 0) 2723 err = zvol_first_open(zv); 2724 if (err) { 2725 mutex_exit(&spa_namespace_lock); 2726 return (err); 2727 } 2728 if ((flags & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 2729 err = SET_ERROR(EROFS); 2730 goto out; 2731 } 2732 if (zv->zv_flags & ZVOL_EXCL) { 2733 err = SET_ERROR(EBUSY); 2734 goto out; 2735 } 2736#ifdef FEXCL 2737 if (flags & FEXCL) { 2738 if (zv->zv_total_opens != 0) { 2739 err = SET_ERROR(EBUSY); 2740 goto out; 2741 } 2742 zv->zv_flags |= ZVOL_EXCL; 2743 } 2744#endif 2745 2746 zv->zv_total_opens++; 2747 mutex_exit(&spa_namespace_lock); 2748 return (err); 2749out: 2750 if (zv->zv_total_opens == 0) 2751 zvol_last_close(zv); 2752 mutex_exit(&spa_namespace_lock); 2753 return (err); 2754} 2755 2756static int 2757zvol_d_close(struct cdev *dev, int flags, int fmt, struct thread *td) 2758{ 2759 zvol_state_t *zv; 2760 int err = 0; 2761 2762 mutex_enter(&spa_namespace_lock); 2763 zv = dev->si_drv2; 2764 if (zv == NULL) { 2765 mutex_exit(&spa_namespace_lock); 2766 return(ENXIO); 2767 } 2768 2769 if (zv->zv_flags & ZVOL_EXCL) { 2770 ASSERT(zv->zv_total_opens == 1); 2771 zv->zv_flags &= ~ZVOL_EXCL; 2772 } 2773 2774 /* 2775 * If the open count is zero, this is a spurious close. 2776 * That indicates a bug in the kernel / DDI framework. 2777 */ 2778 ASSERT(zv->zv_total_opens != 0); 2779 2780 /* 2781 * You may get multiple opens, but only one close. 2782 */ 2783 zv->zv_total_opens--; 2784 2785 if (zv->zv_total_opens == 0) 2786 zvol_last_close(zv); 2787 2788 mutex_exit(&spa_namespace_lock); 2789 return (0); 2790} 2791 2792static int 2793zvol_d_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) 2794{ 2795 zvol_state_t *zv; 2796 rl_t *rl; 2797 off_t offset, length, chunk; 2798 int i, error; 2799 u_int u; 2800 2801 zv = dev->si_drv2; 2802 2803 error = 0; 2804 KASSERT(zv->zv_total_opens > 0, 2805 ("Device with zero access count in zvol_d_ioctl")); 2806 2807 i = IOCPARM_LEN(cmd); 2808 switch (cmd) { 2809 case DIOCGSECTORSIZE: 2810 *(u_int *)data = DEV_BSIZE; 2811 break; 2812 case DIOCGMEDIASIZE: 2813 *(off_t *)data = zv->zv_volsize; 2814 break; 2815 case DIOCGFLUSH: 2816 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2817 break; 2818 case DIOCGDELETE: 2819 offset = ((off_t *)data)[0]; 2820 length = ((off_t *)data)[1]; 2821 if ((offset % DEV_BSIZE) != 0 || (length % DEV_BSIZE) != 0 || 2822 offset < 0 || offset >= zv->zv_volsize || 2823 length <= 0) { 2824 printf("%s: offset=%jd length=%jd\n", __func__, offset, 2825 length); 2826 error = EINVAL; 2827 break; 2828 } 2829 2830 rl = zfs_range_lock(&zv->zv_znode, offset, length, RL_WRITER); 2831 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 2832 error = dmu_tx_assign(tx, TXG_WAIT); 2833 if (error != 0) { 2834 dmu_tx_abort(tx); 2835 } else { 2836 zvol_log_truncate(zv, tx, offset, length, B_TRUE); 2837 dmu_tx_commit(tx); 2838 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 2839 offset, length); 2840 } 2841 zfs_range_unlock(rl); 2842 if (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) 2843 zil_commit(zv->zv_zilog, ZVOL_OBJ); 2844 break; 2845 case DIOCGSTRIPESIZE: 2846 *(off_t *)data = zv->zv_volblocksize; 2847 break; 2848 case DIOCGSTRIPEOFFSET: 2849 *(off_t *)data = 0; 2850 break; 2851 default: 2852 error = ENOIOCTL; 2853 } 2854 2855 return (error); 2856} 2857