1256052Sgrehan/*- 2256052Sgrehan * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 3256052Sgrehan * All rights reserved. 4256052Sgrehan * 5256052Sgrehan * Redistribution and use in source and binary forms, with or without 6256052Sgrehan * modification, are permitted provided that the following conditions 7256052Sgrehan * are met: 8256052Sgrehan * 1. Redistributions of source code must retain the above copyright 9256052Sgrehan * notice, this list of conditions and the following disclaimer. 10256052Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11256052Sgrehan * notice, this list of conditions and the following disclaimer in the 12256052Sgrehan * documentation and/or other materials provided with the distribution. 13256052Sgrehan * 14256052Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 15256052Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16256052Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17256052Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18256052Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19256052Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20256052Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21256052Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22256052Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23256052Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24256052Sgrehan * SUCH DAMAGE. 25256052Sgrehan * 26256052Sgrehan * $FreeBSD$ 27256052Sgrehan */ 28256052Sgrehan 29256052Sgrehan#include <sys/cdefs.h> 30256052Sgrehan__FBSDID("$FreeBSD$"); 31256052Sgrehan 32256052Sgrehan#include <sys/param.h> 33256052Sgrehan#include <sys/queue.h> 34256052Sgrehan#include <sys/errno.h> 35256052Sgrehan#include <sys/stat.h> 36256052Sgrehan#include <sys/ioctl.h> 37256052Sgrehan#include <sys/disk.h> 38256052Sgrehan 39256052Sgrehan#include <assert.h> 40256052Sgrehan#include <fcntl.h> 41256052Sgrehan#include <stdio.h> 42256052Sgrehan#include <stdlib.h> 43256052Sgrehan#include <string.h> 44256052Sgrehan#include <pthread.h> 45256052Sgrehan#include <pthread_np.h> 46256052Sgrehan#include <unistd.h> 47256052Sgrehan 48256052Sgrehan#include "bhyverun.h" 49256052Sgrehan#include "block_if.h" 50256052Sgrehan 51256052Sgrehan#define BLOCKIF_SIG 0xb109b109 52256052Sgrehan 53267071Sjhb#define BLOCKIF_MAXREQ 32 54256052Sgrehan 55256052Sgrehanenum blockop { 56256052Sgrehan BOP_READ, 57256052Sgrehan BOP_WRITE, 58256052Sgrehan BOP_FLUSH, 59256052Sgrehan BOP_CANCEL 60256052Sgrehan}; 61256052Sgrehan 62256052Sgrehanenum blockstat { 63256052Sgrehan BST_FREE, 64256052Sgrehan BST_INUSE 65256052Sgrehan}; 66256052Sgrehan 67256052Sgrehanstruct blockif_elem { 68256052Sgrehan TAILQ_ENTRY(blockif_elem) be_link; 69256052Sgrehan struct blockif_req *be_req; 70256052Sgrehan enum blockop be_op; 71256052Sgrehan enum blockstat be_status; 72256052Sgrehan}; 73256052Sgrehan 74256052Sgrehanstruct blockif_ctxt { 75256052Sgrehan int bc_magic; 76256052Sgrehan int bc_fd; 77256052Sgrehan int bc_rdonly; 78256052Sgrehan off_t bc_size; 79256052Sgrehan int bc_sectsz; 80256052Sgrehan pthread_t bc_btid; 81256052Sgrehan pthread_mutex_t bc_mtx; 82256052Sgrehan pthread_cond_t bc_cond; 83256052Sgrehan int bc_closing; 84256052Sgrehan 85256052Sgrehan /* Request elements and free/inuse queues */ 86256052Sgrehan TAILQ_HEAD(, blockif_elem) bc_freeq; 87256052Sgrehan TAILQ_HEAD(, blockif_elem) bc_inuseq; 88256052Sgrehan u_int bc_req_count; 89256052Sgrehan struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 90256052Sgrehan}; 91256052Sgrehan 92256052Sgrehanstatic int 93256052Sgrehanblockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 94256052Sgrehan enum blockop op) 95256052Sgrehan{ 96256052Sgrehan struct blockif_elem *be; 97256052Sgrehan 98256052Sgrehan assert(bc->bc_req_count < BLOCKIF_MAXREQ); 99256052Sgrehan 100256052Sgrehan be = TAILQ_FIRST(&bc->bc_freeq); 101256052Sgrehan assert(be != NULL); 102256052Sgrehan assert(be->be_status == BST_FREE); 103256052Sgrehan 104256052Sgrehan TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 105256052Sgrehan be->be_status = BST_INUSE; 106256052Sgrehan be->be_req = breq; 107256052Sgrehan be->be_op = op; 108256052Sgrehan TAILQ_INSERT_TAIL(&bc->bc_inuseq, be, be_link); 109256052Sgrehan 110256052Sgrehan bc->bc_req_count++; 111256052Sgrehan 112256052Sgrehan return (0); 113256052Sgrehan} 114256052Sgrehan 115256052Sgrehanstatic int 116256052Sgrehanblockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem *el) 117256052Sgrehan{ 118256052Sgrehan struct blockif_elem *be; 119256052Sgrehan 120256052Sgrehan if (bc->bc_req_count == 0) 121256052Sgrehan return (ENOENT); 122256052Sgrehan 123256052Sgrehan be = TAILQ_FIRST(&bc->bc_inuseq); 124256052Sgrehan assert(be != NULL); 125256052Sgrehan assert(be->be_status == BST_INUSE); 126256052Sgrehan *el = *be; 127256052Sgrehan 128256052Sgrehan TAILQ_REMOVE(&bc->bc_inuseq, be, be_link); 129256052Sgrehan be->be_status = BST_FREE; 130256052Sgrehan be->be_req = NULL; 131256052Sgrehan TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 132256052Sgrehan 133256052Sgrehan bc->bc_req_count--; 134256052Sgrehan 135256052Sgrehan return (0); 136256052Sgrehan} 137256052Sgrehan 138256052Sgrehanstatic void 139256052Sgrehanblockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) 140256052Sgrehan{ 141256052Sgrehan struct blockif_req *br; 142256052Sgrehan int err; 143256052Sgrehan 144256052Sgrehan br = be->be_req; 145256052Sgrehan err = 0; 146256052Sgrehan 147256052Sgrehan switch (be->be_op) { 148256052Sgrehan case BOP_READ: 149256052Sgrehan if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 150256052Sgrehan br->br_offset) < 0) 151256052Sgrehan err = errno; 152256052Sgrehan break; 153256052Sgrehan case BOP_WRITE: 154256052Sgrehan if (bc->bc_rdonly) 155256052Sgrehan err = EROFS; 156256052Sgrehan else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 157256052Sgrehan br->br_offset) < 0) 158256052Sgrehan err = errno; 159256052Sgrehan break; 160256052Sgrehan case BOP_FLUSH: 161256052Sgrehan break; 162256052Sgrehan case BOP_CANCEL: 163256052Sgrehan err = EINTR; 164256052Sgrehan break; 165256052Sgrehan default: 166256052Sgrehan err = EINVAL; 167256052Sgrehan break; 168256052Sgrehan } 169256052Sgrehan 170256052Sgrehan (*br->br_callback)(br, err); 171256052Sgrehan} 172256052Sgrehan 173256052Sgrehanstatic void * 174256052Sgrehanblockif_thr(void *arg) 175256052Sgrehan{ 176256052Sgrehan struct blockif_ctxt *bc; 177256052Sgrehan struct blockif_elem req; 178256052Sgrehan 179256052Sgrehan bc = arg; 180256052Sgrehan 181256052Sgrehan for (;;) { 182256052Sgrehan pthread_mutex_lock(&bc->bc_mtx); 183256052Sgrehan while (!blockif_dequeue(bc, &req)) { 184256052Sgrehan pthread_mutex_unlock(&bc->bc_mtx); 185256052Sgrehan blockif_proc(bc, &req); 186256052Sgrehan pthread_mutex_lock(&bc->bc_mtx); 187256052Sgrehan } 188256052Sgrehan pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 189256052Sgrehan pthread_mutex_unlock(&bc->bc_mtx); 190256052Sgrehan 191256052Sgrehan /* 192256052Sgrehan * Check ctxt status here to see if exit requested 193256052Sgrehan */ 194256052Sgrehan if (bc->bc_closing) 195256052Sgrehan pthread_exit(NULL); 196256052Sgrehan } 197256052Sgrehan 198256052Sgrehan /* Not reached */ 199256052Sgrehan return (NULL); 200256052Sgrehan} 201256052Sgrehan 202256052Sgrehanstruct blockif_ctxt * 203256052Sgrehanblockif_open(const char *optstr, const char *ident) 204256052Sgrehan{ 205256052Sgrehan char tname[MAXCOMLEN + 1]; 206256052Sgrehan char *nopt, *xopts; 207256052Sgrehan struct blockif_ctxt *bc; 208256052Sgrehan struct stat sbuf; 209256052Sgrehan off_t size; 210256052Sgrehan int extra, fd, i, sectsz; 211256052Sgrehan int nocache, sync, ro; 212256052Sgrehan 213256052Sgrehan nocache = 0; 214256052Sgrehan sync = 0; 215256052Sgrehan ro = 0; 216256052Sgrehan 217256052Sgrehan /* 218256052Sgrehan * The first element in the optstring is always a pathname. 219256052Sgrehan * Optional elements follow 220256052Sgrehan */ 221256052Sgrehan nopt = strdup(optstr); 222256052Sgrehan for (xopts = strtok(nopt, ","); 223256052Sgrehan xopts != NULL; 224256052Sgrehan xopts = strtok(NULL, ",")) { 225256052Sgrehan if (!strcmp(xopts, "nocache")) 226256052Sgrehan nocache = 1; 227256052Sgrehan else if (!strcmp(xopts, "sync")) 228256052Sgrehan sync = 1; 229256052Sgrehan else if (!strcmp(xopts, "ro")) 230256052Sgrehan ro = 1; 231256052Sgrehan } 232256052Sgrehan 233256052Sgrehan extra = 0; 234256052Sgrehan if (nocache) 235256052Sgrehan extra |= O_DIRECT; 236256052Sgrehan if (sync) 237256052Sgrehan extra |= O_SYNC; 238256052Sgrehan 239256052Sgrehan fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 240256052Sgrehan if (fd < 0 && !ro) { 241256052Sgrehan /* Attempt a r/w fail with a r/o open */ 242256052Sgrehan fd = open(nopt, O_RDONLY | extra); 243256052Sgrehan ro = 1; 244256052Sgrehan } 245256052Sgrehan 246256052Sgrehan if (fd < 0) { 247256052Sgrehan perror("Could not open backing file"); 248256052Sgrehan return (NULL); 249256052Sgrehan } 250256052Sgrehan 251256052Sgrehan if (fstat(fd, &sbuf) < 0) { 252256052Sgrehan perror("Could not stat backing file"); 253256052Sgrehan close(fd); 254256052Sgrehan return (NULL); 255256052Sgrehan } 256256052Sgrehan 257256052Sgrehan /* 258256052Sgrehan * Deal with raw devices 259256052Sgrehan */ 260256052Sgrehan size = sbuf.st_size; 261256052Sgrehan sectsz = DEV_BSIZE; 262256052Sgrehan if (S_ISCHR(sbuf.st_mode)) { 263256052Sgrehan if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 264256052Sgrehan ioctl(fd, DIOCGSECTORSIZE, §sz)) { 265256052Sgrehan perror("Could not fetch dev blk/sector size"); 266256052Sgrehan close(fd); 267256052Sgrehan return (NULL); 268256052Sgrehan } 269256052Sgrehan assert(size != 0); 270256052Sgrehan assert(sectsz != 0); 271256052Sgrehan } 272256052Sgrehan 273268953Sjhb bc = calloc(1, sizeof(struct blockif_ctxt)); 274256052Sgrehan if (bc == NULL) { 275256052Sgrehan close(fd); 276256052Sgrehan return (NULL); 277256052Sgrehan } 278256052Sgrehan 279256052Sgrehan bc->bc_magic = BLOCKIF_SIG; 280256052Sgrehan bc->bc_fd = fd; 281256052Sgrehan bc->bc_size = size; 282256052Sgrehan bc->bc_sectsz = sectsz; 283256052Sgrehan pthread_mutex_init(&bc->bc_mtx, NULL); 284256052Sgrehan pthread_cond_init(&bc->bc_cond, NULL); 285256052Sgrehan TAILQ_INIT(&bc->bc_freeq); 286256052Sgrehan TAILQ_INIT(&bc->bc_inuseq); 287256052Sgrehan bc->bc_req_count = 0; 288256052Sgrehan for (i = 0; i < BLOCKIF_MAXREQ; i++) { 289256052Sgrehan bc->bc_reqs[i].be_status = BST_FREE; 290256052Sgrehan TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 291256052Sgrehan } 292256052Sgrehan 293256052Sgrehan pthread_create(&bc->bc_btid, NULL, blockif_thr, bc); 294256052Sgrehan 295259301Sgrehan snprintf(tname, sizeof(tname), "blk-%s", ident); 296256052Sgrehan pthread_set_name_np(bc->bc_btid, tname); 297256052Sgrehan 298256052Sgrehan return (bc); 299256052Sgrehan} 300256052Sgrehan 301256052Sgrehanstatic int 302256052Sgrehanblockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 303256052Sgrehan enum blockop op) 304256052Sgrehan{ 305256052Sgrehan int err; 306256052Sgrehan 307256052Sgrehan err = 0; 308256052Sgrehan 309256052Sgrehan pthread_mutex_lock(&bc->bc_mtx); 310256052Sgrehan if (bc->bc_req_count < BLOCKIF_MAXREQ) { 311256052Sgrehan /* 312256052Sgrehan * Enqueue and inform the block i/o thread 313256052Sgrehan * that there is work available 314256052Sgrehan */ 315256052Sgrehan blockif_enqueue(bc, breq, op); 316256052Sgrehan pthread_cond_signal(&bc->bc_cond); 317256052Sgrehan } else { 318256052Sgrehan /* 319256052Sgrehan * Callers are not allowed to enqueue more than 320256052Sgrehan * the specified blockif queue limit. Return an 321256052Sgrehan * error to indicate that the queue length has been 322256052Sgrehan * exceeded. 323256052Sgrehan */ 324256052Sgrehan err = E2BIG; 325256052Sgrehan } 326256052Sgrehan pthread_mutex_unlock(&bc->bc_mtx); 327256052Sgrehan 328256052Sgrehan return (err); 329256052Sgrehan} 330256052Sgrehan 331256052Sgrehanint 332256052Sgrehanblockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 333256052Sgrehan{ 334256052Sgrehan 335256052Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 336256052Sgrehan return (blockif_request(bc, breq, BOP_READ)); 337256052Sgrehan} 338256052Sgrehan 339256052Sgrehanint 340256052Sgrehanblockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 341256052Sgrehan{ 342256052Sgrehan 343256052Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 344256052Sgrehan return (blockif_request(bc, breq, BOP_WRITE)); 345256052Sgrehan} 346256052Sgrehan 347256052Sgrehanint 348256052Sgrehanblockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 349256052Sgrehan{ 350256052Sgrehan 351256052Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 352256052Sgrehan return (blockif_request(bc, breq, BOP_FLUSH)); 353256052Sgrehan} 354256052Sgrehan 355256052Sgrehanint 356256052Sgrehanblockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 357256052Sgrehan{ 358256052Sgrehan 359256052Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 360256052Sgrehan return (blockif_request(bc, breq, BOP_CANCEL)); 361256052Sgrehan} 362256052Sgrehan 363256052Sgrehanint 364256052Sgrehanblockif_close(struct blockif_ctxt *bc) 365256052Sgrehan{ 366256052Sgrehan void *jval; 367256052Sgrehan int err; 368256052Sgrehan 369256052Sgrehan err = 0; 370256052Sgrehan 371256052Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 372256052Sgrehan 373256052Sgrehan /* 374256052Sgrehan * Stop the block i/o thread 375256052Sgrehan */ 376256052Sgrehan bc->bc_closing = 1; 377256052Sgrehan pthread_cond_signal(&bc->bc_cond); 378256052Sgrehan pthread_join(bc->bc_btid, &jval); 379256052Sgrehan 380256052Sgrehan /* XXX Cancel queued i/o's ??? */ 381256052Sgrehan 382256052Sgrehan /* 383256052Sgrehan * Release resources 384256052Sgrehan */ 385256052Sgrehan bc->bc_magic = 0; 386256052Sgrehan close(bc->bc_fd); 387256052Sgrehan free(bc); 388256052Sgrehan 389256052Sgrehan return (0); 390256052Sgrehan} 391256052Sgrehan 392256052Sgrehan/* 393270159Sgrehan * Return virtual C/H/S values for a given block. Use the algorithm 394270159Sgrehan * outlined in the VHD specification to calculate values. 395270159Sgrehan */ 396270159Sgrehanvoid 397270159Sgrehanblockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 398270159Sgrehan{ 399270159Sgrehan off_t sectors; /* total sectors of the block dev */ 400270159Sgrehan off_t hcyl; /* cylinders times heads */ 401270159Sgrehan uint16_t secpt; /* sectors per track */ 402270159Sgrehan uint8_t heads; 403270159Sgrehan 404270159Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 405270159Sgrehan 406270159Sgrehan sectors = bc->bc_size / bc->bc_sectsz; 407270159Sgrehan 408270159Sgrehan /* Clamp the size to the largest possible with CHS */ 409270159Sgrehan if (sectors > 65535UL*16*255) 410270159Sgrehan sectors = 65535UL*16*255; 411270159Sgrehan 412270159Sgrehan if (sectors >= 65536UL*16*63) { 413270159Sgrehan secpt = 255; 414270159Sgrehan heads = 16; 415270159Sgrehan hcyl = sectors / secpt; 416270159Sgrehan } else { 417270159Sgrehan secpt = 17; 418270159Sgrehan hcyl = sectors / secpt; 419270159Sgrehan heads = (hcyl + 1023) / 1024; 420270159Sgrehan 421270159Sgrehan if (heads < 4) 422270159Sgrehan heads = 4; 423270159Sgrehan 424270159Sgrehan if (hcyl >= (heads * 1024) || heads > 16) { 425270159Sgrehan secpt = 31; 426270159Sgrehan heads = 16; 427270159Sgrehan hcyl = sectors / secpt; 428270159Sgrehan } 429270159Sgrehan if (hcyl >= (heads * 1024)) { 430270159Sgrehan secpt = 63; 431270159Sgrehan heads = 16; 432270159Sgrehan hcyl = sectors / secpt; 433270159Sgrehan } 434270159Sgrehan } 435270159Sgrehan 436270159Sgrehan *c = hcyl / heads; 437270159Sgrehan *h = heads; 438270159Sgrehan *s = secpt; 439270159Sgrehan} 440270159Sgrehan 441270159Sgrehan/* 442256052Sgrehan * Accessors 443256052Sgrehan */ 444256052Sgrehanoff_t 445256052Sgrehanblockif_size(struct blockif_ctxt *bc) 446256052Sgrehan{ 447256052Sgrehan 448256052Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 449256052Sgrehan return (bc->bc_size); 450256052Sgrehan} 451256052Sgrehan 452256052Sgrehanint 453256052Sgrehanblockif_sectsz(struct blockif_ctxt *bc) 454256052Sgrehan{ 455256052Sgrehan 456256052Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 457256052Sgrehan return (bc->bc_sectsz); 458256052Sgrehan} 459256052Sgrehan 460256052Sgrehanint 461256052Sgrehanblockif_queuesz(struct blockif_ctxt *bc) 462256052Sgrehan{ 463256052Sgrehan 464256052Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 465256052Sgrehan return (BLOCKIF_MAXREQ); 466256052Sgrehan} 467256052Sgrehan 468256052Sgrehanint 469256052Sgrehanblockif_is_ro(struct blockif_ctxt *bc) 470256052Sgrehan{ 471256052Sgrehan 472256052Sgrehan assert(bc->bc_magic == BLOCKIF_SIG); 473256052Sgrehan return (bc->bc_rdonly); 474256052Sgrehan} 475