1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD$ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD$"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33221828Sgrehan#include <sys/linker_set.h> 34221828Sgrehan#include <sys/stat.h> 35221828Sgrehan#include <sys/uio.h> 36221828Sgrehan#include <sys/ioctl.h> 37244013Sgrehan#include <sys/disk.h> 38221828Sgrehan 39221828Sgrehan#include <errno.h> 40221828Sgrehan#include <fcntl.h> 41221828Sgrehan#include <stdio.h> 42221828Sgrehan#include <stdlib.h> 43221828Sgrehan#include <stdint.h> 44221828Sgrehan#include <string.h> 45221828Sgrehan#include <strings.h> 46221828Sgrehan#include <unistd.h> 47221828Sgrehan#include <assert.h> 48221828Sgrehan#include <pthread.h> 49256390Sgrehan#include <md5.h> 50221828Sgrehan 51244167Sgrehan#include "bhyverun.h" 52221828Sgrehan#include "pci_emul.h" 53221828Sgrehan#include "virtio.h" 54221828Sgrehan 55221828Sgrehan#define VTBLK_RINGSZ 64 56221828Sgrehan 57221828Sgrehan#define VTBLK_MAXSEGS 32 58221828Sgrehan 59221828Sgrehan#define VTBLK_S_OK 0 60221828Sgrehan#define VTBLK_S_IOERR 1 61256390Sgrehan#define VTBLK_S_UNSUPP 2 62221828Sgrehan 63256390Sgrehan#define VTBLK_BLK_ID_BYTES 20 64256390Sgrehan 65257128Sgrehan/* Capability bits */ 66257128Sgrehan#define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */ 67257128Sgrehan#define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */ 68257128Sgrehan 69221828Sgrehan/* 70221828Sgrehan * Host capabilities 71221828Sgrehan */ 72221828Sgrehan#define VTBLK_S_HOSTCAPS \ 73257128Sgrehan ( VTBLK_F_SEG_MAX | \ 74257128Sgrehan VTBLK_F_BLK_SIZE | \ 75253440Sgrehan VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ 76221828Sgrehan 77221828Sgrehan/* 78253440Sgrehan * Config space "registers" 79221828Sgrehan */ 80221828Sgrehanstruct vtblk_config { 81221828Sgrehan uint64_t vbc_capacity; 82221828Sgrehan uint32_t vbc_size_max; 83221828Sgrehan uint32_t vbc_seg_max; 84221828Sgrehan uint16_t vbc_geom_c; 85221828Sgrehan uint8_t vbc_geom_h; 86221828Sgrehan uint8_t vbc_geom_s; 87221828Sgrehan uint32_t vbc_blk_size; 88221828Sgrehan uint32_t vbc_sectors_max; 89221828Sgrehan} __packed; 90221828Sgrehan 91221828Sgrehan/* 92221828Sgrehan * Fixed-size block header 93221828Sgrehan */ 94221828Sgrehanstruct virtio_blk_hdr { 95247342Sneel#define VBH_OP_READ 0 96247342Sneel#define VBH_OP_WRITE 1 97256390Sgrehan#define VBH_OP_IDENT 8 98247342Sneel#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */ 99221828Sgrehan uint32_t vbh_type; 100221828Sgrehan uint32_t vbh_ioprio; 101221828Sgrehan uint64_t vbh_sector; 102221828Sgrehan} __packed; 103221828Sgrehan 104221828Sgrehan/* 105221828Sgrehan * Debug printf 106221828Sgrehan */ 107221828Sgrehanstatic int pci_vtblk_debug; 108221828Sgrehan#define DPRINTF(params) if (pci_vtblk_debug) printf params 109221828Sgrehan#define WPRINTF(params) printf params 110221828Sgrehan 111221828Sgrehan/* 112221828Sgrehan * Per-device softc 113221828Sgrehan */ 114221828Sgrehanstruct pci_vtblk_softc { 115253440Sgrehan struct virtio_softc vbsc_vs; 116267393Sjhb pthread_mutex_t vsc_mtx; 117253440Sgrehan struct vqueue_info vbsc_vq; 118221828Sgrehan int vbsc_fd; 119221828Sgrehan struct vtblk_config vbsc_cfg; 120256390Sgrehan char vbsc_ident[VTBLK_BLK_ID_BYTES]; 121221828Sgrehan}; 122221828Sgrehan 123253440Sgrehanstatic void pci_vtblk_reset(void *); 124253440Sgrehanstatic void pci_vtblk_notify(void *, struct vqueue_info *); 125253440Sgrehanstatic int pci_vtblk_cfgread(void *, int, int, uint32_t *); 126253440Sgrehanstatic int pci_vtblk_cfgwrite(void *, int, int, uint32_t); 127246214Sneel 128253440Sgrehanstatic struct virtio_consts vtblk_vi_consts = { 129253440Sgrehan "vtblk", /* our name */ 130253440Sgrehan 1, /* we support 1 virtqueue */ 131253440Sgrehan sizeof(struct vtblk_config), /* config reg size */ 132253440Sgrehan pci_vtblk_reset, /* reset */ 133253440Sgrehan pci_vtblk_notify, /* device-wide qnotify */ 134253440Sgrehan pci_vtblk_cfgread, /* read PCI config */ 135253440Sgrehan pci_vtblk_cfgwrite, /* write PCI config */ 136271685Sgrehan NULL, /* apply negotiated features */ 137253440Sgrehan VTBLK_S_HOSTCAPS, /* our capabilities */ 138253440Sgrehan}; 139246214Sneel 140221828Sgrehanstatic void 141253440Sgrehanpci_vtblk_reset(void *vsc) 142221828Sgrehan{ 143253440Sgrehan struct pci_vtblk_softc *sc = vsc; 144221828Sgrehan 145253440Sgrehan DPRINTF(("vtblk: device reset requested !\n")); 146253440Sgrehan vi_reset_dev(&sc->vbsc_vs); 147221828Sgrehan} 148221828Sgrehan 149221828Sgrehanstatic void 150253440Sgrehanpci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) 151221828Sgrehan{ 152221828Sgrehan struct virtio_blk_hdr *vbh; 153221828Sgrehan uint8_t *status; 154253440Sgrehan int i, n; 155221828Sgrehan int err; 156221828Sgrehan int iolen; 157253440Sgrehan int writeop, type; 158221828Sgrehan off_t offset; 159253440Sgrehan struct iovec iov[VTBLK_MAXSEGS + 2]; 160253440Sgrehan uint16_t flags[VTBLK_MAXSEGS + 2]; 161221828Sgrehan 162253440Sgrehan n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags); 163221828Sgrehan 164253440Sgrehan /* 165253440Sgrehan * The first descriptor will be the read-only fixed header, 166253440Sgrehan * and the last is for status (hence +2 above and below). 167253440Sgrehan * The remaining iov's are the actual data I/O vectors. 168253440Sgrehan * 169253440Sgrehan * XXX - note - this fails on crash dump, which does a 170253440Sgrehan * VIRTIO_BLK_T_FLUSH with a zero transfer length 171253440Sgrehan */ 172255890Sgrehan assert(n >= 2 && n <= VTBLK_MAXSEGS + 2); 173221828Sgrehan 174253440Sgrehan assert((flags[0] & VRING_DESC_F_WRITE) == 0); 175253440Sgrehan assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); 176253440Sgrehan vbh = iov[0].iov_base; 177221828Sgrehan 178253440Sgrehan status = iov[--n].iov_base; 179253440Sgrehan assert(iov[n].iov_len == 1); 180253440Sgrehan assert(flags[n] & VRING_DESC_F_WRITE); 181221828Sgrehan 182221828Sgrehan /* 183247342Sneel * XXX 184247342Sneel * The guest should not be setting the BARRIER flag because 185247342Sneel * we don't advertise the capability. 186247342Sneel */ 187247342Sneel type = vbh->vbh_type & ~VBH_FLAG_BARRIER; 188247342Sneel writeop = (type == VBH_OP_WRITE); 189221828Sgrehan 190221828Sgrehan offset = vbh->vbh_sector * DEV_BSIZE; 191221828Sgrehan 192253440Sgrehan iolen = 0; 193253440Sgrehan for (i = 1; i < n; i++) { 194221828Sgrehan /* 195221828Sgrehan * - write op implies read-only descriptor, 196256390Sgrehan * - read/ident op implies write-only descriptor, 197221828Sgrehan * therefore test the inverse of the descriptor bit 198221828Sgrehan * to the op. 199221828Sgrehan */ 200253440Sgrehan assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); 201253440Sgrehan iolen += iov[i].iov_len; 202221828Sgrehan } 203221828Sgrehan 204221828Sgrehan DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 205256390Sgrehan writeop ? "write" : "read/ident", iolen, i - 1, offset)); 206221828Sgrehan 207256390Sgrehan switch (type) { 208256390Sgrehan case VBH_OP_WRITE: 209253440Sgrehan err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset); 210256390Sgrehan break; 211256390Sgrehan case VBH_OP_READ: 212253440Sgrehan err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset); 213256390Sgrehan break; 214256390Sgrehan case VBH_OP_IDENT: 215256390Sgrehan /* Assume a single buffer */ 216256390Sgrehan strlcpy(iov[1].iov_base, sc->vbsc_ident, 217268976Sjhb MIN(iov[1].iov_len, sizeof(sc->vbsc_ident))); 218256390Sgrehan err = 0; 219256390Sgrehan break; 220256390Sgrehan default: 221256390Sgrehan err = -ENOSYS; 222256390Sgrehan break; 223256390Sgrehan } 224221828Sgrehan 225256390Sgrehan /* convert errno into a virtio block error return */ 226256390Sgrehan if (err < 0) { 227256390Sgrehan if (err == -ENOSYS) 228256390Sgrehan *status = VTBLK_S_UNSUPP; 229256390Sgrehan else 230256390Sgrehan *status = VTBLK_S_IOERR; 231256390Sgrehan } else 232256390Sgrehan *status = VTBLK_S_OK; 233221828Sgrehan 234221828Sgrehan /* 235253440Sgrehan * Return the descriptor back to the host. 236253440Sgrehan * We wrote 1 byte (our status) to host. 237221828Sgrehan */ 238253440Sgrehan vq_relchain(vq, 1); 239221828Sgrehan} 240221828Sgrehan 241221828Sgrehanstatic void 242253440Sgrehanpci_vtblk_notify(void *vsc, struct vqueue_info *vq) 243249813Sneel{ 244253440Sgrehan struct pci_vtblk_softc *sc = vsc; 245249813Sneel 246253440Sgrehan vq_startchains(vq); 247253440Sgrehan while (vq_has_descs(vq)) 248253440Sgrehan pci_vtblk_proc(sc, vq); 249253440Sgrehan vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ 250249813Sneel} 251249813Sneel 252221828Sgrehanstatic int 253221828Sgrehanpci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 254221828Sgrehan{ 255221828Sgrehan struct stat sbuf; 256256390Sgrehan MD5_CTX mdctx; 257256390Sgrehan u_char digest[16]; 258221828Sgrehan struct pci_vtblk_softc *sc; 259244013Sgrehan off_t size; 260221828Sgrehan int fd; 261244013Sgrehan int sectsz; 262221828Sgrehan 263221828Sgrehan if (opts == NULL) { 264221828Sgrehan printf("virtio-block: backing device required\n"); 265221828Sgrehan return (1); 266221828Sgrehan } 267221828Sgrehan 268221828Sgrehan /* 269221828Sgrehan * The supplied backing file has to exist 270221828Sgrehan */ 271221828Sgrehan fd = open(opts, O_RDWR); 272221828Sgrehan if (fd < 0) { 273221828Sgrehan perror("Could not open backing file"); 274221828Sgrehan return (1); 275221828Sgrehan } 276221828Sgrehan 277221828Sgrehan if (fstat(fd, &sbuf) < 0) { 278221828Sgrehan perror("Could not stat backing file"); 279221828Sgrehan close(fd); 280221828Sgrehan return (1); 281221828Sgrehan } 282244013Sgrehan 283244013Sgrehan /* 284244013Sgrehan * Deal with raw devices 285244013Sgrehan */ 286244013Sgrehan size = sbuf.st_size; 287244013Sgrehan sectsz = DEV_BSIZE; 288244013Sgrehan if (S_ISCHR(sbuf.st_mode)) { 289244013Sgrehan if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 290244013Sgrehan ioctl(fd, DIOCGSECTORSIZE, §sz)) { 291244013Sgrehan perror("Could not fetch dev blk/sector size"); 292244013Sgrehan close(fd); 293244013Sgrehan return (1); 294244013Sgrehan } 295244013Sgrehan assert(size != 0); 296244013Sgrehan assert(sectsz != 0); 297244013Sgrehan } 298244013Sgrehan 299268953Sjhb sc = calloc(1, sizeof(struct pci_vtblk_softc)); 300221828Sgrehan 301253440Sgrehan /* record fd of storage device/file */ 302221828Sgrehan sc->vbsc_fd = fd; 303221828Sgrehan 304267393Sjhb pthread_mutex_init(&sc->vsc_mtx, NULL); 305267393Sjhb 306253440Sgrehan /* init virtio softc and virtqueues */ 307253440Sgrehan vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); 308267393Sjhb sc->vbsc_vs.vs_mtx = &sc->vsc_mtx; 309267393Sjhb 310253440Sgrehan sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; 311253440Sgrehan /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ 312253440Sgrehan 313256390Sgrehan /* 314256390Sgrehan * Create an identifier for the backing file. Use parts of the 315256390Sgrehan * md5 sum of the filename 316256390Sgrehan */ 317256390Sgrehan MD5Init(&mdctx); 318256390Sgrehan MD5Update(&mdctx, opts, strlen(opts)); 319256390Sgrehan MD5Final(digest, &mdctx); 320256390Sgrehan sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", 321256390Sgrehan digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); 322256390Sgrehan 323221828Sgrehan /* setup virtio block config space */ 324257128Sgrehan sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */ 325221828Sgrehan sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; 326244013Sgrehan sc->vbsc_cfg.vbc_blk_size = sectsz; 327221828Sgrehan sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ 328221828Sgrehan sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */ 329221828Sgrehan sc->vbsc_cfg.vbc_geom_h = 0; 330221828Sgrehan sc->vbsc_cfg.vbc_geom_s = 0; 331221828Sgrehan sc->vbsc_cfg.vbc_sectors_max = 0; 332221828Sgrehan 333253440Sgrehan /* 334253440Sgrehan * Should we move some of this into virtio.c? Could 335253440Sgrehan * have the device, class, and subdev_0 as fields in 336253440Sgrehan * the virtio constants structure. 337253440Sgrehan */ 338221828Sgrehan pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); 339221828Sgrehan pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 340221828Sgrehan pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); 341221828Sgrehan pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); 342246214Sneel 343267393Sjhb pci_lintr_request(pi); 344267393Sjhb 345256755Sgrehan if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) 346253440Sgrehan return (1); 347253440Sgrehan vi_set_io_bar(&sc->vbsc_vs, 0); 348221828Sgrehan return (0); 349221828Sgrehan} 350221828Sgrehan 351253440Sgrehanstatic int 352253440Sgrehanpci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) 353246214Sneel{ 354246214Sneel 355253440Sgrehan DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); 356253440Sgrehan return (1); 357246214Sneel} 358246214Sneel 359253440Sgrehanstatic int 360253440Sgrehanpci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) 361221828Sgrehan{ 362253440Sgrehan struct pci_vtblk_softc *sc = vsc; 363222830Sgrehan void *ptr; 364221828Sgrehan 365253440Sgrehan /* our caller has already verified offset and size */ 366253440Sgrehan ptr = (uint8_t *)&sc->vbsc_cfg + offset; 367253440Sgrehan memcpy(retval, ptr, size); 368253440Sgrehan return (0); 369221828Sgrehan} 370221828Sgrehan 371221828Sgrehanstruct pci_devemu pci_de_vblk = { 372241744Sgrehan .pe_emu = "virtio-blk", 373241744Sgrehan .pe_init = pci_vtblk_init, 374253440Sgrehan .pe_barwrite = vi_pci_write, 375253440Sgrehan .pe_barread = vi_pci_read 376221828Sgrehan}; 377221828SgrehanPCI_EMUL_SET(pci_de_vblk); 378