pci_virtio_block.c revision 256390
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_block.c 256390 2013-10-12 19:41:35Z grehan $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_block.c 256390 2013-10-12 19:41:35Z grehan $"); 31 32#include <sys/param.h> 33#include <sys/linker_set.h> 34#include <sys/stat.h> 35#include <sys/uio.h> 36#include <sys/ioctl.h> 37#include <sys/disk.h> 38 39#include <errno.h> 40#include <fcntl.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <stdint.h> 44#include <string.h> 45#include <strings.h> 46#include <unistd.h> 47#include <assert.h> 48#include <pthread.h> 49#include <md5.h> 50 51#include "bhyverun.h" 52#include "pci_emul.h" 53#include "virtio.h" 54 55#ifndef min 56#define min(a, b) ((a) < (b) ? (a) : (b)) 57#endif 58 59#define VTBLK_RINGSZ 64 60 61#define VTBLK_MAXSEGS 32 62 63#define VTBLK_S_OK 0 64#define VTBLK_S_IOERR 1 65#define VTBLK_S_UNSUPP 2 66 67#define VTBLK_BLK_ID_BYTES 20 68 69/* 70 * Host capabilities 71 */ 72#define VTBLK_S_HOSTCAPS \ 73 ( 0x00000004 | /* host maximum request segments */ \ 74 VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ 75 76/* 77 * Config space "registers" 78 */ 79struct vtblk_config { 80 uint64_t vbc_capacity; 81 uint32_t vbc_size_max; 82 uint32_t vbc_seg_max; 83 uint16_t vbc_geom_c; 84 uint8_t vbc_geom_h; 85 uint8_t vbc_geom_s; 86 uint32_t vbc_blk_size; 87 uint32_t vbc_sectors_max; 88} __packed; 89 90/* 91 * Fixed-size block header 92 */ 93struct virtio_blk_hdr { 94#define VBH_OP_READ 0 95#define VBH_OP_WRITE 1 96#define VBH_OP_IDENT 8 97#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */ 98 uint32_t vbh_type; 99 uint32_t vbh_ioprio; 100 uint64_t vbh_sector; 101} __packed; 102 103/* 104 * Debug printf 105 */ 106static int pci_vtblk_debug; 107#define DPRINTF(params) if (pci_vtblk_debug) printf params 108#define WPRINTF(params) printf params 109 110/* 111 * Per-device softc 112 */ 113struct pci_vtblk_softc { 114 struct virtio_softc vbsc_vs; 115 struct vqueue_info vbsc_vq; 116 int vbsc_fd; 117 struct vtblk_config vbsc_cfg; 118 char vbsc_ident[VTBLK_BLK_ID_BYTES]; 119}; 120 121static void pci_vtblk_reset(void *); 122static void pci_vtblk_notify(void *, struct vqueue_info *); 123static int pci_vtblk_cfgread(void *, int, int, uint32_t *); 124static int pci_vtblk_cfgwrite(void *, int, int, uint32_t); 125 126static struct virtio_consts vtblk_vi_consts = { 127 "vtblk", /* our name */ 128 1, /* we support 1 virtqueue */ 129 sizeof(struct vtblk_config), /* config reg size */ 130 pci_vtblk_reset, /* reset */ 131 pci_vtblk_notify, /* device-wide qnotify */ 132 pci_vtblk_cfgread, /* read PCI config */ 133 pci_vtblk_cfgwrite, /* write PCI config */ 134 VTBLK_S_HOSTCAPS, /* our capabilities */ 135}; 136 137static void 138pci_vtblk_reset(void *vsc) 139{ 140 struct pci_vtblk_softc *sc = vsc; 141 142 DPRINTF(("vtblk: device reset requested !\n")); 143 vi_reset_dev(&sc->vbsc_vs); 144} 145 146static void 147pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) 148{ 149 struct virtio_blk_hdr *vbh; 150 uint8_t *status; 151 int i, n; 152 int err; 153 int iolen; 154 int writeop, type; 155 off_t offset; 156 struct iovec iov[VTBLK_MAXSEGS + 2]; 157 uint16_t flags[VTBLK_MAXSEGS + 2]; 158 159 n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags); 160 161 /* 162 * The first descriptor will be the read-only fixed header, 163 * and the last is for status (hence +2 above and below). 164 * The remaining iov's are the actual data I/O vectors. 165 * 166 * XXX - note - this fails on crash dump, which does a 167 * VIRTIO_BLK_T_FLUSH with a zero transfer length 168 */ 169 assert(n >= 2 && n <= VTBLK_MAXSEGS + 2); 170 171 assert((flags[0] & VRING_DESC_F_WRITE) == 0); 172 assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); 173 vbh = iov[0].iov_base; 174 175 status = iov[--n].iov_base; 176 assert(iov[n].iov_len == 1); 177 assert(flags[n] & VRING_DESC_F_WRITE); 178 179 /* 180 * XXX 181 * The guest should not be setting the BARRIER flag because 182 * we don't advertise the capability. 183 */ 184 type = vbh->vbh_type & ~VBH_FLAG_BARRIER; 185 writeop = (type == VBH_OP_WRITE); 186 187 offset = vbh->vbh_sector * DEV_BSIZE; 188 189 iolen = 0; 190 for (i = 1; i < n; i++) { 191 /* 192 * - write op implies read-only descriptor, 193 * - read/ident op implies write-only descriptor, 194 * therefore test the inverse of the descriptor bit 195 * to the op. 196 */ 197 assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); 198 iolen += iov[i].iov_len; 199 } 200 201 DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 202 writeop ? "write" : "read/ident", iolen, i - 1, offset)); 203 204 switch (type) { 205 case VBH_OP_WRITE: 206 err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset); 207 break; 208 case VBH_OP_READ: 209 err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset); 210 break; 211 case VBH_OP_IDENT: 212 /* Assume a single buffer */ 213 strlcpy(iov[1].iov_base, sc->vbsc_ident, 214 min(iov[1].iov_len, sizeof(sc->vbsc_ident))); 215 err = 0; 216 break; 217 default: 218 err = -ENOSYS; 219 break; 220 } 221 222 /* convert errno into a virtio block error return */ 223 if (err < 0) { 224 if (err == -ENOSYS) 225 *status = VTBLK_S_UNSUPP; 226 else 227 *status = VTBLK_S_IOERR; 228 } else 229 *status = VTBLK_S_OK; 230 231 /* 232 * Return the descriptor back to the host. 233 * We wrote 1 byte (our status) to host. 234 */ 235 vq_relchain(vq, 1); 236} 237 238static void 239pci_vtblk_notify(void *vsc, struct vqueue_info *vq) 240{ 241 struct pci_vtblk_softc *sc = vsc; 242 243 vq_startchains(vq); 244 while (vq_has_descs(vq)) 245 pci_vtblk_proc(sc, vq); 246 vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ 247} 248 249static int 250pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 251{ 252 struct stat sbuf; 253 MD5_CTX mdctx; 254 u_char digest[16]; 255 struct pci_vtblk_softc *sc; 256 off_t size; 257 int fd; 258 int sectsz; 259 int use_msix; 260 const char *env_msi; 261 262 if (opts == NULL) { 263 printf("virtio-block: backing device required\n"); 264 return (1); 265 } 266 267 /* 268 * The supplied backing file has to exist 269 */ 270 fd = open(opts, O_RDWR); 271 if (fd < 0) { 272 perror("Could not open backing file"); 273 return (1); 274 } 275 276 if (fstat(fd, &sbuf) < 0) { 277 perror("Could not stat backing file"); 278 close(fd); 279 return (1); 280 } 281 282 /* 283 * Deal with raw devices 284 */ 285 size = sbuf.st_size; 286 sectsz = DEV_BSIZE; 287 if (S_ISCHR(sbuf.st_mode)) { 288 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 289 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 290 perror("Could not fetch dev blk/sector size"); 291 close(fd); 292 return (1); 293 } 294 assert(size != 0); 295 assert(sectsz != 0); 296 } 297 298 sc = malloc(sizeof(struct pci_vtblk_softc)); 299 memset(sc, 0, sizeof(struct pci_vtblk_softc)); 300 301 /* record fd of storage device/file */ 302 sc->vbsc_fd = fd; 303 304 /* init virtio softc and virtqueues */ 305 vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); 306 sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; 307 /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ 308 309 /* 310 * Create an identifier for the backing file. Use parts of the 311 * md5 sum of the filename 312 */ 313 MD5Init(&mdctx); 314 MD5Update(&mdctx, opts, strlen(opts)); 315 MD5Final(digest, &mdctx); 316 sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", 317 digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); 318 319 /* setup virtio block config space */ 320 sc->vbsc_cfg.vbc_capacity = size / sectsz; 321 sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; 322 sc->vbsc_cfg.vbc_blk_size = sectsz; 323 sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ 324 sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */ 325 sc->vbsc_cfg.vbc_geom_h = 0; 326 sc->vbsc_cfg.vbc_geom_s = 0; 327 sc->vbsc_cfg.vbc_sectors_max = 0; 328 329 /* 330 * Should we move some of this into virtio.c? Could 331 * have the device, class, and subdev_0 as fields in 332 * the virtio constants structure. 333 */ 334 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); 335 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 336 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); 337 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); 338 339 use_msix = 1; 340 if ((env_msi = getenv("BHYVE_USE_MSI"))) { 341 if (strcasecmp(env_msi, "yes") == 0) 342 use_msix = 0; 343 } 344 if (vi_intr_init(&sc->vbsc_vs, 1, use_msix)) 345 return (1); 346 vi_set_io_bar(&sc->vbsc_vs, 0); 347 return (0); 348} 349 350static int 351pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) 352{ 353 354 DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); 355 return (1); 356} 357 358static int 359pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) 360{ 361 struct pci_vtblk_softc *sc = vsc; 362 void *ptr; 363 364 /* our caller has already verified offset and size */ 365 ptr = (uint8_t *)&sc->vbsc_cfg + offset; 366 memcpy(retval, ptr, size); 367 return (0); 368} 369 370struct pci_devemu pci_de_vblk = { 371 .pe_emu = "virtio-blk", 372 .pe_init = pci_vtblk_init, 373 .pe_barwrite = vi_pci_write, 374 .pe_barread = vi_pci_read 375}; 376PCI_EMUL_SET(pci_de_vblk); 377