1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 24 */ 25 26#include <sys/zfs_context.h> 27#include <sys/spa.h> 28#include <sys/vdev_file.h> 29#include <sys/vdev_impl.h> 30#include <sys/zio.h> 31#include <sys/fs/zfs.h> 32#include <sys/fm/fs/zfs.h> 33 34/* 35 * Virtual device vector for files. 36 */ 37 38static taskq_t *vdev_file_taskq; 39 40void 41vdev_file_init(void) 42{ 43 vdev_file_taskq = taskq_create("z_vdev_file", MAX(max_ncpus, 16), 44 minclsyspri, max_ncpus, INT_MAX, 0); 45} 46 47void 48vdev_file_fini(void) 49{ 50 taskq_destroy(vdev_file_taskq); 51} 52 53static void 54vdev_file_hold(vdev_t *vd) 55{ 56 ASSERT(vd->vdev_path != NULL); 57} 58 59static void 60vdev_file_rele(vdev_t *vd) 61{ 62 ASSERT(vd->vdev_path != NULL); 63} 64 65static int 66vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 67 uint64_t *logical_ashift, uint64_t *physical_ashift) 68{ 69 vdev_file_t *vf; 70 vnode_t *vp; 71 vattr_t vattr; 72 int error; 73 74 /* 75 * We must have a pathname, and it must be absolute. 76 */ 77 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 78 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 79 return (SET_ERROR(EINVAL)); 80 } 81 82 /* 83 * Reopen the device if it's not currently open. Otherwise, 84 * just update the physical size of the device. 85 */ 86 if (vd->vdev_tsd != NULL) { 87 ASSERT(vd->vdev_reopening); 88 vf = vd->vdev_tsd; 89 vp = vf->vf_vnode; 90 goto skip_open; 91 } 92 93 vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); 94 95 /* 96 * We always open the files from the root of the global zone, even if 97 * we're in a local zone. If the user has gotten to this point, the 98 * administrator has already decided that the pool should be available 99 * to local zone users, so the underlying devices should be as well. 100 */ 101 ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); 102 error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, 103 spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1); 104 105 if (error) { 106 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 107 kmem_free(vd->vdev_tsd, sizeof (vdev_file_t)); 108 vd->vdev_tsd = NULL; 109 return (error); 110 } 111 112 vf->vf_vnode = vp; 113 114#ifdef _KERNEL 115 /* 116 * Make sure it's a regular file. 117 */ 118 if (vp->v_type != VREG) { 119#ifdef __FreeBSD__ 120 (void) VOP_CLOSE(vp, spa_mode(vd->vdev_spa), 1, 0, kcred, NULL); 121#endif 122 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 123#ifdef __FreeBSD__ 124 kmem_free(vd->vdev_tsd, sizeof (vdev_file_t)); 125 vd->vdev_tsd = NULL; 126#endif 127 return (SET_ERROR(ENODEV)); 128 } 129#endif /* _KERNEL */ 130 131skip_open: 132 /* 133 * Determine the physical size of the file. 134 */ 135 vattr.va_mask = AT_SIZE; 136 vn_lock(vp, LK_SHARED | LK_RETRY); 137 error = VOP_GETATTR(vp, &vattr, kcred); 138 VOP_UNLOCK(vp, 0); 139 if (error) { 140 (void) VOP_CLOSE(vp, spa_mode(vd->vdev_spa), 1, 0, kcred, NULL); 141 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 142 kmem_free(vd->vdev_tsd, sizeof (vdev_file_t)); 143 vd->vdev_tsd = NULL; 144 return (error); 145 } 146 147 vd->vdev_notrim = B_TRUE; 148 149 *max_psize = *psize = vattr.va_size; 150 *logical_ashift = SPA_MINBLOCKSHIFT; 151 *physical_ashift = SPA_MINBLOCKSHIFT; 152 153 return (0); 154} 155 156static void 157vdev_file_close(vdev_t *vd) 158{ 159 vdev_file_t *vf = vd->vdev_tsd; 160 161 if (vd->vdev_reopening || vf == NULL) 162 return; 163 164 if (vf->vf_vnode != NULL) { 165 (void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0, 166 kcred, NULL); 167 } 168 169 vd->vdev_delayed_close = B_FALSE; 170 kmem_free(vf, sizeof (vdev_file_t)); 171 vd->vdev_tsd = NULL; 172} 173 174/* 175 * Implements the interrupt side for file vdev types. This routine will be 176 * called when the I/O completes allowing us to transfer the I/O to the 177 * interrupt taskqs. For consistency, the code structure mimics disk vdev 178 * types. 179 */ 180static void 181vdev_file_io_intr(zio_t *zio) 182{ 183 zio_delay_interrupt(zio); 184} 185 186static void 187vdev_file_io_strategy(void *arg) 188{ 189 zio_t *zio = arg; 190 vdev_t *vd = zio->io_vd; 191 vdev_file_t *vf; 192 vnode_t *vp; 193 ssize_t resid; 194 195 vf = vd->vdev_tsd; 196 vp = vf->vf_vnode; 197 198 ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); 199 zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? 200 UIO_READ : UIO_WRITE, vp, zio->io_data, zio->io_size, 201 zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 202 203 if (resid != 0 && zio->io_error == 0) 204 zio->io_error = ENOSPC; 205 206 vdev_file_io_intr(zio); 207} 208 209static void 210vdev_file_io_start(zio_t *zio) 211{ 212 vdev_t *vd = zio->io_vd; 213 vdev_file_t *vf = vd->vdev_tsd; 214 215 if (zio->io_type == ZIO_TYPE_IOCTL) { 216 /* XXPOLICY */ 217 if (!vdev_readable(vd)) { 218 zio->io_error = SET_ERROR(ENXIO); 219 zio_interrupt(zio); 220 return; 221 } 222 223 switch (zio->io_cmd) { 224 case DKIOCFLUSHWRITECACHE: 225 zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, 226 kcred, NULL); 227 break; 228 default: 229 zio->io_error = SET_ERROR(ENOTSUP); 230 } 231 232 zio_execute(zio); 233 return; 234 } 235 236 ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); 237 zio->io_target_timestamp = zio_handle_io_delay(zio); 238 239 VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio, 240 TQ_SLEEP), !=, 0); 241} 242 243/* ARGSUSED */ 244static void 245vdev_file_io_done(zio_t *zio) 246{ 247} 248 249vdev_ops_t vdev_file_ops = { 250 vdev_file_open, 251 vdev_file_close, 252 vdev_default_asize, 253 vdev_file_io_start, 254 vdev_file_io_done, 255 NULL, 256 vdev_file_hold, 257 vdev_file_rele, 258 VDEV_TYPE_FILE, /* name of this vdev type */ 259 B_TRUE /* leaf vdev */ 260}; 261 262/* 263 * From userland we access disks just like files. 264 */ 265#ifndef _KERNEL 266 267vdev_ops_t vdev_disk_ops = { 268 vdev_file_open, 269 vdev_file_close, 270 vdev_default_asize, 271 vdev_file_io_start, 272 vdev_file_io_done, 273 NULL, 274 vdev_file_hold, 275 vdev_file_rele, 276 VDEV_TYPE_DISK, /* name of this vdev type */ 277 B_TRUE /* leaf vdev */ 278}; 279 280#endif 281