1256052Sgrehan/*-
2256052Sgrehan * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
3256052Sgrehan * All rights reserved.
4256052Sgrehan *
5256052Sgrehan * Redistribution and use in source and binary forms, with or without
6256052Sgrehan * modification, are permitted provided that the following conditions
7256052Sgrehan * are met:
8256052Sgrehan * 1. Redistributions of source code must retain the above copyright
9256052Sgrehan *    notice, this list of conditions and the following disclaimer.
10256052Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11256052Sgrehan *    notice, this list of conditions and the following disclaimer in the
12256052Sgrehan *    documentation and/or other materials provided with the distribution.
13256052Sgrehan *
14256052Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15256052Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16256052Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17256052Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18256052Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19256052Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20256052Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21256052Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22256052Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23256052Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24256052Sgrehan * SUCH DAMAGE.
25256052Sgrehan *
26256052Sgrehan * $FreeBSD$
27256052Sgrehan */
28256052Sgrehan
29256052Sgrehan#include <sys/cdefs.h>
30256052Sgrehan__FBSDID("$FreeBSD$");
31256052Sgrehan
32256052Sgrehan#include <sys/param.h>
33256052Sgrehan#include <sys/queue.h>
34256052Sgrehan#include <sys/errno.h>
35256052Sgrehan#include <sys/stat.h>
36256052Sgrehan#include <sys/ioctl.h>
37256052Sgrehan#include <sys/disk.h>
38256052Sgrehan
39256052Sgrehan#include <assert.h>
40256052Sgrehan#include <fcntl.h>
41256052Sgrehan#include <stdio.h>
42256052Sgrehan#include <stdlib.h>
43256052Sgrehan#include <string.h>
44256052Sgrehan#include <pthread.h>
45256052Sgrehan#include <pthread_np.h>
46256052Sgrehan#include <unistd.h>
47256052Sgrehan
48256052Sgrehan#include "bhyverun.h"
49256052Sgrehan#include "block_if.h"
50256052Sgrehan
51256052Sgrehan#define BLOCKIF_SIG	0xb109b109
52256052Sgrehan
53267071Sjhb#define BLOCKIF_MAXREQ	32
54256052Sgrehan
55256052Sgrehanenum blockop {
56256052Sgrehan	BOP_READ,
57256052Sgrehan	BOP_WRITE,
58256052Sgrehan	BOP_FLUSH,
59256052Sgrehan	BOP_CANCEL
60256052Sgrehan};
61256052Sgrehan
62256052Sgrehanenum blockstat {
63256052Sgrehan	BST_FREE,
64256052Sgrehan	BST_INUSE
65256052Sgrehan};
66256052Sgrehan
67256052Sgrehanstruct blockif_elem {
68256052Sgrehan	TAILQ_ENTRY(blockif_elem) be_link;
69256052Sgrehan	struct blockif_req  *be_req;
70256052Sgrehan	enum blockop	     be_op;
71256052Sgrehan	enum blockstat	     be_status;
72256052Sgrehan};
73256052Sgrehan
74256052Sgrehanstruct blockif_ctxt {
75256052Sgrehan	int			bc_magic;
76256052Sgrehan	int			bc_fd;
77256052Sgrehan	int			bc_rdonly;
78256052Sgrehan	off_t			bc_size;
79256052Sgrehan	int			bc_sectsz;
80256052Sgrehan	pthread_t		bc_btid;
81256052Sgrehan        pthread_mutex_t		bc_mtx;
82256052Sgrehan        pthread_cond_t		bc_cond;
83256052Sgrehan	int			bc_closing;
84256052Sgrehan
85256052Sgrehan	/* Request elements and free/inuse queues */
86256052Sgrehan	TAILQ_HEAD(, blockif_elem) bc_freeq;
87256052Sgrehan	TAILQ_HEAD(, blockif_elem) bc_inuseq;
88256052Sgrehan	u_int			bc_req_count;
89256052Sgrehan	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
90256052Sgrehan};
91256052Sgrehan
92256052Sgrehanstatic int
93256052Sgrehanblockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
94256052Sgrehan		enum blockop op)
95256052Sgrehan{
96256052Sgrehan	struct blockif_elem *be;
97256052Sgrehan
98256052Sgrehan	assert(bc->bc_req_count < BLOCKIF_MAXREQ);
99256052Sgrehan
100256052Sgrehan	be = TAILQ_FIRST(&bc->bc_freeq);
101256052Sgrehan	assert(be != NULL);
102256052Sgrehan	assert(be->be_status == BST_FREE);
103256052Sgrehan
104256052Sgrehan	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
105256052Sgrehan	be->be_status = BST_INUSE;
106256052Sgrehan	be->be_req = breq;
107256052Sgrehan	be->be_op = op;
108256052Sgrehan	TAILQ_INSERT_TAIL(&bc->bc_inuseq, be, be_link);
109256052Sgrehan
110256052Sgrehan	bc->bc_req_count++;
111256052Sgrehan
112256052Sgrehan	return (0);
113256052Sgrehan}
114256052Sgrehan
115256052Sgrehanstatic int
116256052Sgrehanblockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem *el)
117256052Sgrehan{
118256052Sgrehan	struct blockif_elem *be;
119256052Sgrehan
120256052Sgrehan	if (bc->bc_req_count == 0)
121256052Sgrehan		return (ENOENT);
122256052Sgrehan
123256052Sgrehan	be = TAILQ_FIRST(&bc->bc_inuseq);
124256052Sgrehan	assert(be != NULL);
125256052Sgrehan	assert(be->be_status == BST_INUSE);
126256052Sgrehan	*el = *be;
127256052Sgrehan
128256052Sgrehan	TAILQ_REMOVE(&bc->bc_inuseq, be, be_link);
129256052Sgrehan	be->be_status = BST_FREE;
130256052Sgrehan	be->be_req = NULL;
131256052Sgrehan	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
132256052Sgrehan
133256052Sgrehan	bc->bc_req_count--;
134256052Sgrehan
135256052Sgrehan	return (0);
136256052Sgrehan}
137256052Sgrehan
138256052Sgrehanstatic void
139256052Sgrehanblockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
140256052Sgrehan{
141256052Sgrehan	struct blockif_req *br;
142256052Sgrehan	int err;
143256052Sgrehan
144256052Sgrehan	br = be->be_req;
145256052Sgrehan	err = 0;
146256052Sgrehan
147256052Sgrehan	switch (be->be_op) {
148256052Sgrehan	case BOP_READ:
149256052Sgrehan		if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
150256052Sgrehan			   br->br_offset) < 0)
151256052Sgrehan			err = errno;
152256052Sgrehan		break;
153256052Sgrehan	case BOP_WRITE:
154256052Sgrehan		if (bc->bc_rdonly)
155256052Sgrehan			err = EROFS;
156256052Sgrehan		else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
157256052Sgrehan			     br->br_offset) < 0)
158256052Sgrehan			err = errno;
159256052Sgrehan		break;
160256052Sgrehan	case BOP_FLUSH:
161256052Sgrehan		break;
162256052Sgrehan	case BOP_CANCEL:
163256052Sgrehan		err = EINTR;
164256052Sgrehan		break;
165256052Sgrehan	default:
166256052Sgrehan		err = EINVAL;
167256052Sgrehan		break;
168256052Sgrehan	}
169256052Sgrehan
170256052Sgrehan	(*br->br_callback)(br, err);
171256052Sgrehan}
172256052Sgrehan
173256052Sgrehanstatic void *
174256052Sgrehanblockif_thr(void *arg)
175256052Sgrehan{
176256052Sgrehan	struct blockif_ctxt *bc;
177256052Sgrehan	struct blockif_elem req;
178256052Sgrehan
179256052Sgrehan	bc = arg;
180256052Sgrehan
181256052Sgrehan	for (;;) {
182256052Sgrehan		pthread_mutex_lock(&bc->bc_mtx);
183256052Sgrehan		while (!blockif_dequeue(bc, &req)) {
184256052Sgrehan			pthread_mutex_unlock(&bc->bc_mtx);
185256052Sgrehan			blockif_proc(bc, &req);
186256052Sgrehan			pthread_mutex_lock(&bc->bc_mtx);
187256052Sgrehan		}
188256052Sgrehan		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
189256052Sgrehan		pthread_mutex_unlock(&bc->bc_mtx);
190256052Sgrehan
191256052Sgrehan		/*
192256052Sgrehan		 * Check ctxt status here to see if exit requested
193256052Sgrehan		 */
194256052Sgrehan		if (bc->bc_closing)
195256052Sgrehan			pthread_exit(NULL);
196256052Sgrehan	}
197256052Sgrehan
198256052Sgrehan	/* Not reached */
199256052Sgrehan	return (NULL);
200256052Sgrehan}
201256052Sgrehan
202256052Sgrehanstruct blockif_ctxt *
203256052Sgrehanblockif_open(const char *optstr, const char *ident)
204256052Sgrehan{
205256052Sgrehan	char tname[MAXCOMLEN + 1];
206256052Sgrehan	char *nopt, *xopts;
207256052Sgrehan	struct blockif_ctxt *bc;
208256052Sgrehan	struct stat sbuf;
209256052Sgrehan	off_t size;
210256052Sgrehan	int extra, fd, i, sectsz;
211256052Sgrehan	int nocache, sync, ro;
212256052Sgrehan
213256052Sgrehan	nocache = 0;
214256052Sgrehan	sync = 0;
215256052Sgrehan	ro = 0;
216256052Sgrehan
217256052Sgrehan	/*
218256052Sgrehan	 * The first element in the optstring is always a pathname.
219256052Sgrehan	 * Optional elements follow
220256052Sgrehan	 */
221256052Sgrehan	nopt = strdup(optstr);
222256052Sgrehan	for (xopts = strtok(nopt, ",");
223256052Sgrehan	     xopts != NULL;
224256052Sgrehan	     xopts = strtok(NULL, ",")) {
225256052Sgrehan		if (!strcmp(xopts, "nocache"))
226256052Sgrehan			nocache = 1;
227256052Sgrehan		else if (!strcmp(xopts, "sync"))
228256052Sgrehan			sync = 1;
229256052Sgrehan		else if (!strcmp(xopts, "ro"))
230256052Sgrehan			ro = 1;
231256052Sgrehan	}
232256052Sgrehan
233256052Sgrehan	extra = 0;
234256052Sgrehan	if (nocache)
235256052Sgrehan		extra |= O_DIRECT;
236256052Sgrehan	if (sync)
237256052Sgrehan		extra |= O_SYNC;
238256052Sgrehan
239256052Sgrehan	fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
240256052Sgrehan	if (fd < 0 && !ro) {
241256052Sgrehan		/* Attempt a r/w fail with a r/o open */
242256052Sgrehan		fd = open(nopt, O_RDONLY | extra);
243256052Sgrehan		ro = 1;
244256052Sgrehan	}
245256052Sgrehan
246256052Sgrehan	if (fd < 0) {
247256052Sgrehan		perror("Could not open backing file");
248256052Sgrehan		return (NULL);
249256052Sgrehan	}
250256052Sgrehan
251256052Sgrehan        if (fstat(fd, &sbuf) < 0) {
252256052Sgrehan                perror("Could not stat backing file");
253256052Sgrehan                close(fd);
254256052Sgrehan                return (NULL);
255256052Sgrehan        }
256256052Sgrehan
257256052Sgrehan        /*
258256052Sgrehan	 * Deal with raw devices
259256052Sgrehan	 */
260256052Sgrehan        size = sbuf.st_size;
261256052Sgrehan	sectsz = DEV_BSIZE;
262256052Sgrehan	if (S_ISCHR(sbuf.st_mode)) {
263256052Sgrehan		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
264256052Sgrehan		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
265256052Sgrehan			perror("Could not fetch dev blk/sector size");
266256052Sgrehan			close(fd);
267256052Sgrehan			return (NULL);
268256052Sgrehan		}
269256052Sgrehan		assert(size != 0);
270256052Sgrehan		assert(sectsz != 0);
271256052Sgrehan	}
272256052Sgrehan
273268953Sjhb	bc = calloc(1, sizeof(struct blockif_ctxt));
274256052Sgrehan	if (bc == NULL) {
275256052Sgrehan		close(fd);
276256052Sgrehan		return (NULL);
277256052Sgrehan	}
278256052Sgrehan
279256052Sgrehan	bc->bc_magic = BLOCKIF_SIG;
280256052Sgrehan	bc->bc_fd = fd;
281256052Sgrehan	bc->bc_size = size;
282256052Sgrehan	bc->bc_sectsz = sectsz;
283256052Sgrehan	pthread_mutex_init(&bc->bc_mtx, NULL);
284256052Sgrehan	pthread_cond_init(&bc->bc_cond, NULL);
285256052Sgrehan	TAILQ_INIT(&bc->bc_freeq);
286256052Sgrehan	TAILQ_INIT(&bc->bc_inuseq);
287256052Sgrehan	bc->bc_req_count = 0;
288256052Sgrehan	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
289256052Sgrehan		bc->bc_reqs[i].be_status = BST_FREE;
290256052Sgrehan		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
291256052Sgrehan	}
292256052Sgrehan
293256052Sgrehan	pthread_create(&bc->bc_btid, NULL, blockif_thr, bc);
294256052Sgrehan
295259301Sgrehan	snprintf(tname, sizeof(tname), "blk-%s", ident);
296256052Sgrehan	pthread_set_name_np(bc->bc_btid, tname);
297256052Sgrehan
298256052Sgrehan	return (bc);
299256052Sgrehan}
300256052Sgrehan
301256052Sgrehanstatic int
302256052Sgrehanblockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
303256052Sgrehan		enum blockop op)
304256052Sgrehan{
305256052Sgrehan	int err;
306256052Sgrehan
307256052Sgrehan	err = 0;
308256052Sgrehan
309256052Sgrehan	pthread_mutex_lock(&bc->bc_mtx);
310256052Sgrehan	if (bc->bc_req_count < BLOCKIF_MAXREQ) {
311256052Sgrehan		/*
312256052Sgrehan		 * Enqueue and inform the block i/o thread
313256052Sgrehan		 * that there is work available
314256052Sgrehan		 */
315256052Sgrehan		blockif_enqueue(bc, breq, op);
316256052Sgrehan		pthread_cond_signal(&bc->bc_cond);
317256052Sgrehan	} else {
318256052Sgrehan		/*
319256052Sgrehan		 * Callers are not allowed to enqueue more than
320256052Sgrehan		 * the specified blockif queue limit. Return an
321256052Sgrehan		 * error to indicate that the queue length has been
322256052Sgrehan		 * exceeded.
323256052Sgrehan		 */
324256052Sgrehan		err = E2BIG;
325256052Sgrehan	}
326256052Sgrehan	pthread_mutex_unlock(&bc->bc_mtx);
327256052Sgrehan
328256052Sgrehan	return (err);
329256052Sgrehan}
330256052Sgrehan
331256052Sgrehanint
332256052Sgrehanblockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
333256052Sgrehan{
334256052Sgrehan
335256052Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
336256052Sgrehan	return (blockif_request(bc, breq, BOP_READ));
337256052Sgrehan}
338256052Sgrehan
339256052Sgrehanint
340256052Sgrehanblockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
341256052Sgrehan{
342256052Sgrehan
343256052Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
344256052Sgrehan	return (blockif_request(bc, breq, BOP_WRITE));
345256052Sgrehan}
346256052Sgrehan
347256052Sgrehanint
348256052Sgrehanblockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
349256052Sgrehan{
350256052Sgrehan
351256052Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
352256052Sgrehan	return (blockif_request(bc, breq, BOP_FLUSH));
353256052Sgrehan}
354256052Sgrehan
355256052Sgrehanint
356256052Sgrehanblockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
357256052Sgrehan{
358256052Sgrehan
359256052Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
360256052Sgrehan	return (blockif_request(bc, breq, BOP_CANCEL));
361256052Sgrehan}
362256052Sgrehan
363256052Sgrehanint
364256052Sgrehanblockif_close(struct blockif_ctxt *bc)
365256052Sgrehan{
366256052Sgrehan	void *jval;
367256052Sgrehan	int err;
368256052Sgrehan
369256052Sgrehan	err = 0;
370256052Sgrehan
371256052Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
372256052Sgrehan
373256052Sgrehan	/*
374256052Sgrehan	 * Stop the block i/o thread
375256052Sgrehan	 */
376256052Sgrehan	bc->bc_closing = 1;
377256052Sgrehan	pthread_cond_signal(&bc->bc_cond);
378256052Sgrehan	pthread_join(bc->bc_btid, &jval);
379256052Sgrehan
380256052Sgrehan	/* XXX Cancel queued i/o's ??? */
381256052Sgrehan
382256052Sgrehan	/*
383256052Sgrehan	 * Release resources
384256052Sgrehan	 */
385256052Sgrehan	bc->bc_magic = 0;
386256052Sgrehan	close(bc->bc_fd);
387256052Sgrehan	free(bc);
388256052Sgrehan
389256052Sgrehan	return (0);
390256052Sgrehan}
391256052Sgrehan
392256052Sgrehan/*
393270159Sgrehan * Return virtual C/H/S values for a given block. Use the algorithm
394270159Sgrehan * outlined in the VHD specification to calculate values.
395270159Sgrehan */
396270159Sgrehanvoid
397270159Sgrehanblockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
398270159Sgrehan{
399270159Sgrehan	off_t sectors;		/* total sectors of the block dev */
400270159Sgrehan	off_t hcyl;		/* cylinders times heads */
401270159Sgrehan	uint16_t secpt;		/* sectors per track */
402270159Sgrehan	uint8_t heads;
403270159Sgrehan
404270159Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
405270159Sgrehan
406270159Sgrehan	sectors = bc->bc_size / bc->bc_sectsz;
407270159Sgrehan
408270159Sgrehan	/* Clamp the size to the largest possible with CHS */
409270159Sgrehan	if (sectors > 65535UL*16*255)
410270159Sgrehan		sectors = 65535UL*16*255;
411270159Sgrehan
412270159Sgrehan	if (sectors >= 65536UL*16*63) {
413270159Sgrehan		secpt = 255;
414270159Sgrehan		heads = 16;
415270159Sgrehan		hcyl = sectors / secpt;
416270159Sgrehan	} else {
417270159Sgrehan		secpt = 17;
418270159Sgrehan		hcyl = sectors / secpt;
419270159Sgrehan		heads = (hcyl + 1023) / 1024;
420270159Sgrehan
421270159Sgrehan		if (heads < 4)
422270159Sgrehan			heads = 4;
423270159Sgrehan
424270159Sgrehan		if (hcyl >= (heads * 1024) || heads > 16) {
425270159Sgrehan			secpt = 31;
426270159Sgrehan			heads = 16;
427270159Sgrehan			hcyl = sectors / secpt;
428270159Sgrehan		}
429270159Sgrehan		if (hcyl >= (heads * 1024)) {
430270159Sgrehan			secpt = 63;
431270159Sgrehan			heads = 16;
432270159Sgrehan			hcyl = sectors / secpt;
433270159Sgrehan		}
434270159Sgrehan	}
435270159Sgrehan
436270159Sgrehan	*c = hcyl / heads;
437270159Sgrehan	*h = heads;
438270159Sgrehan	*s = secpt;
439270159Sgrehan}
440270159Sgrehan
441270159Sgrehan/*
442256052Sgrehan * Accessors
443256052Sgrehan */
444256052Sgrehanoff_t
445256052Sgrehanblockif_size(struct blockif_ctxt *bc)
446256052Sgrehan{
447256052Sgrehan
448256052Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
449256052Sgrehan	return (bc->bc_size);
450256052Sgrehan}
451256052Sgrehan
452256052Sgrehanint
453256052Sgrehanblockif_sectsz(struct blockif_ctxt *bc)
454256052Sgrehan{
455256052Sgrehan
456256052Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
457256052Sgrehan	return (bc->bc_sectsz);
458256052Sgrehan}
459256052Sgrehan
460256052Sgrehanint
461256052Sgrehanblockif_queuesz(struct blockif_ctxt *bc)
462256052Sgrehan{
463256052Sgrehan
464256052Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
465256052Sgrehan	return (BLOCKIF_MAXREQ);
466256052Sgrehan}
467256052Sgrehan
468256052Sgrehanint
469256052Sgrehanblockif_is_ro(struct blockif_ctxt *bc)
470256052Sgrehan{
471256052Sgrehan
472256052Sgrehan	assert(bc->bc_magic == BLOCKIF_SIG);
473256052Sgrehan	return (bc->bc_rdonly);
474256052Sgrehan}
475