pci_ahci.c revision 304420
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 304420 2016-08-18 11:45:02Z mav $
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 304420 2016-08-18 11:45:02Z mav $");
32
33#include <sys/param.h>
34#include <sys/linker_set.h>
35#include <sys/stat.h>
36#include <sys/uio.h>
37#include <sys/ioctl.h>
38#include <sys/disk.h>
39#include <sys/ata.h>
40#include <sys/endian.h>
41
42#include <errno.h>
43#include <fcntl.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <stdint.h>
47#include <string.h>
48#include <strings.h>
49#include <unistd.h>
50#include <assert.h>
51#include <pthread.h>
52#include <pthread_np.h>
53#include <inttypes.h>
54#include <md5.h>
55
56#include "bhyverun.h"
57#include "pci_emul.h"
58#include "ahci.h"
59#include "block_if.h"
60
61#define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
62#define	MAX_PORTS	32	/* AHCI supports 32 ports */
63
64#define	PxSIG_ATA	0x00000101 /* ATA drive */
65#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
66
67enum sata_fis_type {
68	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
69	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
70	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
71	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
72	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
73	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
74	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
75	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
76};
77
78/*
79 * SCSI opcodes
80 */
81#define	TEST_UNIT_READY		0x00
82#define	REQUEST_SENSE		0x03
83#define	INQUIRY			0x12
84#define	START_STOP_UNIT		0x1B
85#define	PREVENT_ALLOW		0x1E
86#define	READ_CAPACITY		0x25
87#define	READ_10			0x28
88#define	POSITION_TO_ELEMENT	0x2B
89#define	READ_TOC		0x43
90#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
91#define	MODE_SENSE_10		0x5A
92#define	REPORT_LUNS		0xA0
93#define	READ_12			0xA8
94#define	READ_CD			0xBE
95
96/*
97 * SCSI mode page codes
98 */
99#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
100#define	MODEPAGE_CD_CAPABILITIES	0x2A
101
102/*
103 * ATA commands
104 */
105#define	ATA_SF_ENAB_SATA_SF		0x10
106#define		ATA_SATA_SF_AN		0x05
107#define	ATA_SF_DIS_SATA_SF		0x90
108
109/*
110 * Debug printf
111 */
112#ifdef AHCI_DEBUG
113static FILE *dbg;
114#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
115#else
116#define DPRINTF(format, arg...)
117#endif
118#define WPRINTF(format, arg...) printf(format, ##arg)
119
120struct ahci_ioreq {
121	struct blockif_req io_req;
122	struct ahci_port *io_pr;
123	STAILQ_ENTRY(ahci_ioreq) io_flist;
124	TAILQ_ENTRY(ahci_ioreq) io_blist;
125	uint8_t *cfis;
126	uint32_t len;
127	uint32_t done;
128	int slot;
129	int more;
130};
131
132struct ahci_port {
133	struct blockif_ctxt *bctx;
134	struct pci_ahci_softc *pr_sc;
135	uint8_t *cmd_lst;
136	uint8_t *rfis;
137	char ident[20 + 1];
138	int atapi;
139	int reset;
140	int waitforclear;
141	int mult_sectors;
142	uint8_t xfermode;
143	uint8_t err_cfis[20];
144	uint8_t sense_key;
145	uint8_t asc;
146	u_int ccs;
147	uint32_t pending;
148
149	uint32_t clb;
150	uint32_t clbu;
151	uint32_t fb;
152	uint32_t fbu;
153	uint32_t is;
154	uint32_t ie;
155	uint32_t cmd;
156	uint32_t unused0;
157	uint32_t tfd;
158	uint32_t sig;
159	uint32_t ssts;
160	uint32_t sctl;
161	uint32_t serr;
162	uint32_t sact;
163	uint32_t ci;
164	uint32_t sntf;
165	uint32_t fbs;
166
167	/*
168	 * i/o request info
169	 */
170	struct ahci_ioreq *ioreq;
171	int ioqsz;
172	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
173	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
174};
175
176struct ahci_cmd_hdr {
177	uint16_t flags;
178	uint16_t prdtl;
179	uint32_t prdbc;
180	uint64_t ctba;
181	uint32_t reserved[4];
182};
183
184struct ahci_prdt_entry {
185	uint64_t dba;
186	uint32_t reserved;
187#define	DBCMASK		0x3fffff
188	uint32_t dbc;
189};
190
191struct pci_ahci_softc {
192	struct pci_devinst *asc_pi;
193	pthread_mutex_t	mtx;
194	int ports;
195	uint32_t cap;
196	uint32_t ghc;
197	uint32_t is;
198	uint32_t pi;
199	uint32_t vs;
200	uint32_t ccc_ctl;
201	uint32_t ccc_pts;
202	uint32_t em_loc;
203	uint32_t em_ctl;
204	uint32_t cap2;
205	uint32_t bohc;
206	uint32_t lintr;
207	struct ahci_port port[MAX_PORTS];
208};
209#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
210
211static void ahci_handle_port(struct ahci_port *p);
212
213static inline void lba_to_msf(uint8_t *buf, int lba)
214{
215	lba += 150;
216	buf[0] = (lba / 75) / 60;
217	buf[1] = (lba / 75) % 60;
218	buf[2] = lba % 75;
219}
220
221/*
222 * generate HBA intr depending on whether or not ports within
223 * the controller have an interrupt pending.
224 */
225static void
226ahci_generate_intr(struct pci_ahci_softc *sc)
227{
228	struct pci_devinst *pi;
229	int i;
230
231	pi = sc->asc_pi;
232
233	for (i = 0; i < sc->ports; i++) {
234		struct ahci_port *pr;
235		pr = &sc->port[i];
236		if (pr->is & pr->ie)
237			sc->is |= (1 << i);
238	}
239
240	DPRINTF("%s %x\n", __func__, sc->is);
241
242	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
243		if (pci_msi_enabled(pi)) {
244			/*
245			 * Generate an MSI interrupt on every edge
246			 */
247			pci_generate_msi(pi, 0);
248		} else if (!sc->lintr) {
249			/*
250			 * Only generate a pin-based interrupt if one wasn't
251			 * in progress
252			 */
253			sc->lintr = 1;
254			pci_lintr_assert(pi);
255		}
256	} else if (sc->lintr) {
257		/*
258		 * No interrupts: deassert pin-based signal if it had
259		 * been asserted
260		 */
261		pci_lintr_deassert(pi);
262		sc->lintr = 0;
263	}
264}
265
266static void
267ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
268{
269	int offset, len, irq;
270
271	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
272		return;
273
274	switch (ft) {
275	case FIS_TYPE_REGD2H:
276		offset = 0x40;
277		len = 20;
278		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
279		break;
280	case FIS_TYPE_SETDEVBITS:
281		offset = 0x58;
282		len = 8;
283		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
284		break;
285	case FIS_TYPE_PIOSETUP:
286		offset = 0x20;
287		len = 20;
288		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
289		break;
290	default:
291		WPRINTF("unsupported fis type %d\n", ft);
292		return;
293	}
294	if (fis[2] & ATA_S_ERROR) {
295		p->waitforclear = 1;
296		irq |= AHCI_P_IX_TFE;
297	}
298	memcpy(p->rfis + offset, fis, len);
299	if (irq) {
300		p->is |= irq;
301		ahci_generate_intr(p->pr_sc);
302	}
303}
304
305static void
306ahci_write_fis_piosetup(struct ahci_port *p)
307{
308	uint8_t fis[20];
309
310	memset(fis, 0, sizeof(fis));
311	fis[0] = FIS_TYPE_PIOSETUP;
312	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
313}
314
315static void
316ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
317{
318	uint8_t fis[8];
319	uint8_t error;
320
321	error = (tfd >> 8) & 0xff;
322	tfd &= 0x77;
323	memset(fis, 0, sizeof(fis));
324	fis[0] = FIS_TYPE_SETDEVBITS;
325	fis[1] = (1 << 6);
326	fis[2] = tfd;
327	fis[3] = error;
328	if (fis[2] & ATA_S_ERROR) {
329		p->err_cfis[0] = slot;
330		p->err_cfis[2] = tfd;
331		p->err_cfis[3] = error;
332		memcpy(&p->err_cfis[4], cfis + 4, 16);
333	} else {
334		*(uint32_t *)(fis + 4) = (1 << slot);
335		p->sact &= ~(1 << slot);
336	}
337	p->tfd &= ~0x77;
338	p->tfd |= tfd;
339	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
340}
341
342static void
343ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
344{
345	uint8_t fis[20];
346	uint8_t error;
347
348	error = (tfd >> 8) & 0xff;
349	memset(fis, 0, sizeof(fis));
350	fis[0] = FIS_TYPE_REGD2H;
351	fis[1] = (1 << 6);
352	fis[2] = tfd & 0xff;
353	fis[3] = error;
354	fis[4] = cfis[4];
355	fis[5] = cfis[5];
356	fis[6] = cfis[6];
357	fis[7] = cfis[7];
358	fis[8] = cfis[8];
359	fis[9] = cfis[9];
360	fis[10] = cfis[10];
361	fis[11] = cfis[11];
362	fis[12] = cfis[12];
363	fis[13] = cfis[13];
364	if (fis[2] & ATA_S_ERROR) {
365		p->err_cfis[0] = 0x80;
366		p->err_cfis[2] = tfd & 0xff;
367		p->err_cfis[3] = error;
368		memcpy(&p->err_cfis[4], cfis + 4, 16);
369	} else
370		p->ci &= ~(1 << slot);
371	p->tfd = tfd;
372	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
373}
374
375static void
376ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
377{
378	uint8_t fis[20];
379
380	p->tfd = ATA_S_READY | ATA_S_DSC;
381	memset(fis, 0, sizeof(fis));
382	fis[0] = FIS_TYPE_REGD2H;
383	fis[1] = 0;			/* No interrupt */
384	fis[2] = p->tfd;		/* Status */
385	fis[3] = 0;			/* No error */
386	p->ci &= ~(1 << slot);
387	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
388}
389
390static void
391ahci_write_reset_fis_d2h(struct ahci_port *p)
392{
393	uint8_t fis[20];
394
395	memset(fis, 0, sizeof(fis));
396	fis[0] = FIS_TYPE_REGD2H;
397	fis[3] = 1;
398	fis[4] = 1;
399	if (p->atapi) {
400		fis[5] = 0x14;
401		fis[6] = 0xeb;
402	}
403	fis[12] = 1;
404	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
405}
406
407static void
408ahci_check_stopped(struct ahci_port *p)
409{
410	/*
411	 * If we are no longer processing the command list and nothing
412	 * is in-flight, clear the running bit, the current command
413	 * slot, the command issue and active bits.
414	 */
415	if (!(p->cmd & AHCI_P_CMD_ST)) {
416		if (p->pending == 0) {
417			p->ccs = 0;
418			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
419			p->ci = 0;
420			p->sact = 0;
421			p->waitforclear = 0;
422		}
423	}
424}
425
426static void
427ahci_port_stop(struct ahci_port *p)
428{
429	struct ahci_ioreq *aior;
430	uint8_t *cfis;
431	int slot;
432	int ncq;
433	int error;
434
435	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
436
437	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
438		/*
439		 * Try to cancel the outstanding blockif request.
440		 */
441		error = blockif_cancel(p->bctx, &aior->io_req);
442		if (error != 0)
443			continue;
444
445		slot = aior->slot;
446		cfis = aior->cfis;
447		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
448		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
449		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
450			ncq = 1;
451
452		if (ncq)
453			p->sact &= ~(1 << slot);
454		else
455			p->ci &= ~(1 << slot);
456
457		/*
458		 * This command is now done.
459		 */
460		p->pending &= ~(1 << slot);
461
462		/*
463		 * Delete the blockif request from the busy list
464		 */
465		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
466
467		/*
468		 * Move the blockif request back to the free list
469		 */
470		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
471	}
472
473	ahci_check_stopped(p);
474}
475
476static void
477ahci_port_reset(struct ahci_port *pr)
478{
479	pr->serr = 0;
480	pr->sact = 0;
481	pr->xfermode = ATA_UDMA6;
482	pr->mult_sectors = 128;
483
484	if (!pr->bctx) {
485		pr->ssts = ATA_SS_DET_NO_DEVICE;
486		pr->sig = 0xFFFFFFFF;
487		pr->tfd = 0x7F;
488		return;
489	}
490	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
491	if (pr->sctl & ATA_SC_SPD_MASK)
492		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
493	else
494		pr->ssts |= ATA_SS_SPD_GEN3;
495	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
496	if (!pr->atapi) {
497		pr->sig = PxSIG_ATA;
498		pr->tfd |= ATA_S_READY;
499	} else
500		pr->sig = PxSIG_ATAPI;
501	ahci_write_reset_fis_d2h(pr);
502}
503
504static void
505ahci_reset(struct pci_ahci_softc *sc)
506{
507	int i;
508
509	sc->ghc = AHCI_GHC_AE;
510	sc->is = 0;
511
512	if (sc->lintr) {
513		pci_lintr_deassert(sc->asc_pi);
514		sc->lintr = 0;
515	}
516
517	for (i = 0; i < sc->ports; i++) {
518		sc->port[i].ie = 0;
519		sc->port[i].is = 0;
520		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
521		if (sc->port[i].bctx)
522			sc->port[i].cmd |= AHCI_P_CMD_CPS;
523		sc->port[i].sctl = 0;
524		ahci_port_reset(&sc->port[i]);
525	}
526}
527
528static void
529ata_string(uint8_t *dest, const char *src, int len)
530{
531	int i;
532
533	for (i = 0; i < len; i++) {
534		if (*src)
535			dest[i ^ 1] = *src++;
536		else
537			dest[i ^ 1] = ' ';
538	}
539}
540
541static void
542atapi_string(uint8_t *dest, const char *src, int len)
543{
544	int i;
545
546	for (i = 0; i < len; i++) {
547		if (*src)
548			dest[i] = *src++;
549		else
550			dest[i] = ' ';
551	}
552}
553
554/*
555 * Build up the iovec based on the PRDT, 'done' and 'len'.
556 */
557static void
558ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
559    struct ahci_prdt_entry *prdt, uint16_t prdtl)
560{
561	struct blockif_req *breq = &aior->io_req;
562	int i, j, skip, todo, left, extra;
563	uint32_t dbcsz;
564
565	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
566	skip = aior->done;
567	left = aior->len - aior->done;
568	todo = 0;
569	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
570	    i++, prdt++) {
571		dbcsz = (prdt->dbc & DBCMASK) + 1;
572		/* Skip already done part of the PRDT */
573		if (dbcsz <= skip) {
574			skip -= dbcsz;
575			continue;
576		}
577		dbcsz -= skip;
578		if (dbcsz > left)
579			dbcsz = left;
580		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
581		    prdt->dba + skip, dbcsz);
582		breq->br_iov[j].iov_len = dbcsz;
583		todo += dbcsz;
584		left -= dbcsz;
585		skip = 0;
586		j++;
587	}
588
589	/* If we got limited by IOV length, round I/O down to sector size. */
590	if (j == BLOCKIF_IOV_MAX) {
591		extra = todo % blockif_sectsz(p->bctx);
592		todo -= extra;
593		assert(todo > 0);
594		while (extra > 0) {
595			if (breq->br_iov[j - 1].iov_len > extra) {
596				breq->br_iov[j - 1].iov_len -= extra;
597				break;
598			}
599			extra -= breq->br_iov[j - 1].iov_len;
600			j--;
601		}
602	}
603
604	breq->br_iovcnt = j;
605	breq->br_resid = todo;
606	aior->done += todo;
607	aior->more = (aior->done < aior->len && i < prdtl);
608}
609
610static void
611ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
612{
613	struct ahci_ioreq *aior;
614	struct blockif_req *breq;
615	struct ahci_prdt_entry *prdt;
616	struct ahci_cmd_hdr *hdr;
617	uint64_t lba;
618	uint32_t len;
619	int err, first, ncq, readop;
620
621	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
622	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
623	ncq = 0;
624	readop = 1;
625	first = (done == 0);
626
627	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
628	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
629	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
630	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
631		readop = 0;
632
633	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
634	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
635		lba = ((uint64_t)cfis[10] << 40) |
636			((uint64_t)cfis[9] << 32) |
637			((uint64_t)cfis[8] << 24) |
638			((uint64_t)cfis[6] << 16) |
639			((uint64_t)cfis[5] << 8) |
640			cfis[4];
641		len = cfis[11] << 8 | cfis[3];
642		if (!len)
643			len = 65536;
644		ncq = 1;
645	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
646	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
647	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
648		lba = ((uint64_t)cfis[10] << 40) |
649			((uint64_t)cfis[9] << 32) |
650			((uint64_t)cfis[8] << 24) |
651			((uint64_t)cfis[6] << 16) |
652			((uint64_t)cfis[5] << 8) |
653			cfis[4];
654		len = cfis[13] << 8 | cfis[12];
655		if (!len)
656			len = 65536;
657	} else {
658		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
659			(cfis[5] << 8) | cfis[4];
660		len = cfis[12];
661		if (!len)
662			len = 256;
663	}
664	lba *= blockif_sectsz(p->bctx);
665	len *= blockif_sectsz(p->bctx);
666
667	/* Pull request off free list */
668	aior = STAILQ_FIRST(&p->iofhd);
669	assert(aior != NULL);
670	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
671
672	aior->cfis = cfis;
673	aior->slot = slot;
674	aior->len = len;
675	aior->done = done;
676	breq = &aior->io_req;
677	breq->br_offset = lba + done;
678	ahci_build_iov(p, aior, prdt, hdr->prdtl);
679
680	/* Mark this command in-flight. */
681	p->pending |= 1 << slot;
682
683	/* Stuff request onto busy list. */
684	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
685
686	if (ncq && first)
687		ahci_write_fis_d2h_ncq(p, slot);
688
689	if (readop)
690		err = blockif_read(p->bctx, breq);
691	else
692		err = blockif_write(p->bctx, breq);
693	assert(err == 0);
694}
695
696static void
697ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
698{
699	struct ahci_ioreq *aior;
700	struct blockif_req *breq;
701	int err;
702
703	/*
704	 * Pull request off free list
705	 */
706	aior = STAILQ_FIRST(&p->iofhd);
707	assert(aior != NULL);
708	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
709	aior->cfis = cfis;
710	aior->slot = slot;
711	aior->len = 0;
712	aior->done = 0;
713	aior->more = 0;
714	breq = &aior->io_req;
715
716	/*
717	 * Mark this command in-flight.
718	 */
719	p->pending |= 1 << slot;
720
721	/*
722	 * Stuff request onto busy list
723	 */
724	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
725
726	err = blockif_flush(p->bctx, breq);
727	assert(err == 0);
728}
729
730static inline void
731read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
732		void *buf, int size)
733{
734	struct ahci_cmd_hdr *hdr;
735	struct ahci_prdt_entry *prdt;
736	void *to;
737	int i, len;
738
739	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
740	len = size;
741	to = buf;
742	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
743	for (i = 0; i < hdr->prdtl && len; i++) {
744		uint8_t *ptr;
745		uint32_t dbcsz;
746		int sublen;
747
748		dbcsz = (prdt->dbc & DBCMASK) + 1;
749		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
750		sublen = len < dbcsz ? len : dbcsz;
751		memcpy(to, ptr, sublen);
752		len -= sublen;
753		to += sublen;
754		prdt++;
755	}
756}
757
758static void
759ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
760{
761	struct ahci_ioreq *aior;
762	struct blockif_req *breq;
763	uint8_t *entry;
764	uint64_t elba;
765	uint32_t len, elen;
766	int err, first, ncq;
767	uint8_t buf[512];
768
769	first = (done == 0);
770	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
771		len = (uint16_t)cfis[13] << 8 | cfis[12];
772		len *= 512;
773		ncq = 0;
774	} else { /* ATA_SEND_FPDMA_QUEUED */
775		len = (uint16_t)cfis[11] << 8 | cfis[3];
776		len *= 512;
777		ncq = 1;
778	}
779	read_prdt(p, slot, cfis, buf, sizeof(buf));
780
781next:
782	entry = &buf[done];
783	elba = ((uint64_t)entry[5] << 40) |
784		((uint64_t)entry[4] << 32) |
785		((uint64_t)entry[3] << 24) |
786		((uint64_t)entry[2] << 16) |
787		((uint64_t)entry[1] << 8) |
788		entry[0];
789	elen = (uint16_t)entry[7] << 8 | entry[6];
790	done += 8;
791	if (elen == 0) {
792		if (done >= len) {
793			if (ncq) {
794				if (first)
795					ahci_write_fis_d2h_ncq(p, slot);
796				ahci_write_fis_sdb(p, slot, cfis,
797				    ATA_S_READY | ATA_S_DSC);
798			} else {
799				ahci_write_fis_d2h(p, slot, cfis,
800				    ATA_S_READY | ATA_S_DSC);
801			}
802			p->pending &= ~(1 << slot);
803			ahci_check_stopped(p);
804			if (!first)
805				ahci_handle_port(p);
806			return;
807		}
808		goto next;
809	}
810
811	/*
812	 * Pull request off free list
813	 */
814	aior = STAILQ_FIRST(&p->iofhd);
815	assert(aior != NULL);
816	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
817	aior->cfis = cfis;
818	aior->slot = slot;
819	aior->len = len;
820	aior->done = done;
821	aior->more = (len != done);
822
823	breq = &aior->io_req;
824	breq->br_offset = elba * blockif_sectsz(p->bctx);
825	breq->br_resid = elen * blockif_sectsz(p->bctx);
826
827	/*
828	 * Mark this command in-flight.
829	 */
830	p->pending |= 1 << slot;
831
832	/*
833	 * Stuff request onto busy list
834	 */
835	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
836
837	if (ncq && first)
838		ahci_write_fis_d2h_ncq(p, slot);
839
840	err = blockif_delete(p->bctx, breq);
841	assert(err == 0);
842}
843
844static inline void
845write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
846		void *buf, int size)
847{
848	struct ahci_cmd_hdr *hdr;
849	struct ahci_prdt_entry *prdt;
850	void *from;
851	int i, len;
852
853	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
854	len = size;
855	from = buf;
856	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
857	for (i = 0; i < hdr->prdtl && len; i++) {
858		uint8_t *ptr;
859		uint32_t dbcsz;
860		int sublen;
861
862		dbcsz = (prdt->dbc & DBCMASK) + 1;
863		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
864		sublen = len < dbcsz ? len : dbcsz;
865		memcpy(ptr, from, sublen);
866		len -= sublen;
867		from += sublen;
868		prdt++;
869	}
870	hdr->prdbc = size - len;
871}
872
873static void
874ahci_checksum(uint8_t *buf, int size)
875{
876	int i;
877	uint8_t sum = 0;
878
879	for (i = 0; i < size - 1; i++)
880		sum += buf[i];
881	buf[size - 1] = 0x100 - sum;
882}
883
884static void
885ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
886{
887	struct ahci_cmd_hdr *hdr;
888	uint8_t buf[512];
889
890	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
891	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
892	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
893		ahci_write_fis_d2h(p, slot, cfis,
894		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
895		return;
896	}
897
898	memset(buf, 0, sizeof(buf));
899	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
900	ahci_checksum(buf, sizeof(buf));
901
902	if (cfis[2] == ATA_READ_LOG_EXT)
903		ahci_write_fis_piosetup(p);
904	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
905	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
906}
907
908static void
909handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
910{
911	struct ahci_cmd_hdr *hdr;
912
913	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
914	if (p->atapi || hdr->prdtl == 0) {
915		ahci_write_fis_d2h(p, slot, cfis,
916		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
917	} else {
918		uint16_t buf[256];
919		uint64_t sectors;
920		int sectsz, psectsz, psectoff, candelete, ro;
921		uint16_t cyl;
922		uint8_t sech, heads;
923
924		ro = blockif_is_ro(p->bctx);
925		candelete = blockif_candelete(p->bctx);
926		sectsz = blockif_sectsz(p->bctx);
927		sectors = blockif_size(p->bctx) / sectsz;
928		blockif_chs(p->bctx, &cyl, &heads, &sech);
929		blockif_psectsz(p->bctx, &psectsz, &psectoff);
930		memset(buf, 0, sizeof(buf));
931		buf[0] = 0x0040;
932		buf[1] = cyl;
933		buf[3] = heads;
934		buf[6] = sech;
935		ata_string((uint8_t *)(buf+10), p->ident, 20);
936		ata_string((uint8_t *)(buf+23), "001", 8);
937		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
938		buf[47] = (0x8000 | 128);
939		buf[48] = 0;
940		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
941		buf[50] = (1 << 14);
942		buf[53] = (1 << 1 | 1 << 2);
943		if (p->mult_sectors)
944			buf[59] = (0x100 | p->mult_sectors);
945		if (sectors <= 0x0fffffff) {
946			buf[60] = sectors;
947			buf[61] = (sectors >> 16);
948		} else {
949			buf[60] = 0xffff;
950			buf[61] = 0x0fff;
951		}
952		buf[63] = 0x7;
953		if (p->xfermode & ATA_WDMA0)
954			buf[63] |= (1 << ((p->xfermode & 7) + 8));
955		buf[64] = 0x3;
956		buf[65] = 120;
957		buf[66] = 120;
958		buf[67] = 120;
959		buf[68] = 120;
960		buf[69] = 0;
961		buf[75] = 31;
962		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
963			   ATA_SUPPORT_NCQ);
964		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
965			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
966		buf[80] = 0x3f0;
967		buf[81] = 0x28;
968		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
969			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
970		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
971			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
972		buf[84] = (1 << 14);
973		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
974			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
975		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
976			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
977		buf[87] = (1 << 14);
978		buf[88] = 0x7f;
979		if (p->xfermode & ATA_UDMA0)
980			buf[88] |= (1 << ((p->xfermode & 7) + 8));
981		buf[100] = sectors;
982		buf[101] = (sectors >> 16);
983		buf[102] = (sectors >> 32);
984		buf[103] = (sectors >> 48);
985		if (candelete && !ro) {
986			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
987			buf[105] = 1;
988			buf[169] = ATA_SUPPORT_DSM_TRIM;
989		}
990		buf[106] = 0x4000;
991		buf[209] = 0x4000;
992		if (psectsz > sectsz) {
993			buf[106] |= 0x2000;
994			buf[106] |= ffsl(psectsz / sectsz) - 1;
995			buf[209] |= (psectoff / sectsz);
996		}
997		if (sectsz > 512) {
998			buf[106] |= 0x1000;
999			buf[117] = sectsz / 2;
1000			buf[118] = ((sectsz / 2) >> 16);
1001		}
1002		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1003		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1004		buf[222] = 0x1020;
1005		buf[255] = 0x00a5;
1006		ahci_checksum((uint8_t *)buf, sizeof(buf));
1007		ahci_write_fis_piosetup(p);
1008		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1009		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1010	}
1011}
1012
1013static void
1014handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1015{
1016	if (!p->atapi) {
1017		ahci_write_fis_d2h(p, slot, cfis,
1018		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1019	} else {
1020		uint16_t buf[256];
1021
1022		memset(buf, 0, sizeof(buf));
1023		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
1024		ata_string((uint8_t *)(buf+10), p->ident, 20);
1025		ata_string((uint8_t *)(buf+23), "001", 8);
1026		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
1027		buf[49] = (1 << 9 | 1 << 8);
1028		buf[50] = (1 << 14 | 1);
1029		buf[53] = (1 << 2 | 1 << 1);
1030		buf[62] = 0x3f;
1031		buf[63] = 7;
1032		if (p->xfermode & ATA_WDMA0)
1033			buf[63] |= (1 << ((p->xfermode & 7) + 8));
1034		buf[64] = 3;
1035		buf[65] = 120;
1036		buf[66] = 120;
1037		buf[67] = 120;
1038		buf[68] = 120;
1039		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1040		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1041		buf[78] = (1 << 5);
1042		buf[80] = 0x3f0;
1043		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1044			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1045		buf[83] = (1 << 14);
1046		buf[84] = (1 << 14);
1047		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1048			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1049		buf[87] = (1 << 14);
1050		buf[88] = 0x7f;
1051		if (p->xfermode & ATA_UDMA0)
1052			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1053		buf[222] = 0x1020;
1054		buf[255] = 0x00a5;
1055		ahci_checksum((uint8_t *)buf, sizeof(buf));
1056		ahci_write_fis_piosetup(p);
1057		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1058		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1059	}
1060}
1061
1062static void
1063atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1064{
1065	uint8_t buf[36];
1066	uint8_t *acmd;
1067	int len;
1068	uint32_t tfd;
1069
1070	acmd = cfis + 0x40;
1071
1072	if (acmd[1] & 1) {		/* VPD */
1073		if (acmd[2] == 0) {	/* Supported VPD pages */
1074			buf[0] = 0x05;
1075			buf[1] = 0;
1076			buf[2] = 0;
1077			buf[3] = 1;
1078			buf[4] = 0;
1079			len = 4 + buf[3];
1080		} else {
1081			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1082			p->asc = 0x24;
1083			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1084			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1085			ahci_write_fis_d2h(p, slot, cfis, tfd);
1086			return;
1087		}
1088	} else {
1089		buf[0] = 0x05;
1090		buf[1] = 0x80;
1091		buf[2] = 0x00;
1092		buf[3] = 0x21;
1093		buf[4] = 31;
1094		buf[5] = 0;
1095		buf[6] = 0;
1096		buf[7] = 0;
1097		atapi_string(buf + 8, "BHYVE", 8);
1098		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1099		atapi_string(buf + 32, "001", 4);
1100		len = sizeof(buf);
1101	}
1102
1103	if (len > acmd[4])
1104		len = acmd[4];
1105	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1106	write_prdt(p, slot, cfis, buf, len);
1107	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1108}
1109
1110static void
1111atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1112{
1113	uint8_t buf[8];
1114	uint64_t sectors;
1115
1116	sectors = blockif_size(p->bctx) / 2048;
1117	be32enc(buf, sectors - 1);
1118	be32enc(buf + 4, 2048);
1119	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1120	write_prdt(p, slot, cfis, buf, sizeof(buf));
1121	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1122}
1123
1124static void
1125atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1126{
1127	uint8_t *acmd;
1128	uint8_t format;
1129	int len;
1130
1131	acmd = cfis + 0x40;
1132
1133	len = be16dec(acmd + 7);
1134	format = acmd[9] >> 6;
1135	switch (format) {
1136	case 0:
1137	{
1138		int msf, size;
1139		uint64_t sectors;
1140		uint8_t start_track, buf[20], *bp;
1141
1142		msf = (acmd[1] >> 1) & 1;
1143		start_track = acmd[6];
1144		if (start_track > 1 && start_track != 0xaa) {
1145			uint32_t tfd;
1146			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1147			p->asc = 0x24;
1148			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1149			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1150			ahci_write_fis_d2h(p, slot, cfis, tfd);
1151			return;
1152		}
1153		bp = buf + 2;
1154		*bp++ = 1;
1155		*bp++ = 1;
1156		if (start_track <= 1) {
1157			*bp++ = 0;
1158			*bp++ = 0x14;
1159			*bp++ = 1;
1160			*bp++ = 0;
1161			if (msf) {
1162				*bp++ = 0;
1163				lba_to_msf(bp, 0);
1164				bp += 3;
1165			} else {
1166				*bp++ = 0;
1167				*bp++ = 0;
1168				*bp++ = 0;
1169				*bp++ = 0;
1170			}
1171		}
1172		*bp++ = 0;
1173		*bp++ = 0x14;
1174		*bp++ = 0xaa;
1175		*bp++ = 0;
1176		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1177		sectors >>= 2;
1178		if (msf) {
1179			*bp++ = 0;
1180			lba_to_msf(bp, sectors);
1181			bp += 3;
1182		} else {
1183			be32enc(bp, sectors);
1184			bp += 4;
1185		}
1186		size = bp - buf;
1187		be16enc(buf, size - 2);
1188		if (len > size)
1189			len = size;
1190		write_prdt(p, slot, cfis, buf, len);
1191		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1192		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1193		break;
1194	}
1195	case 1:
1196	{
1197		uint8_t buf[12];
1198
1199		memset(buf, 0, sizeof(buf));
1200		buf[1] = 0xa;
1201		buf[2] = 0x1;
1202		buf[3] = 0x1;
1203		if (len > sizeof(buf))
1204			len = sizeof(buf);
1205		write_prdt(p, slot, cfis, buf, len);
1206		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1207		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1208		break;
1209	}
1210	case 2:
1211	{
1212		int msf, size;
1213		uint64_t sectors;
1214		uint8_t start_track, *bp, buf[50];
1215
1216		msf = (acmd[1] >> 1) & 1;
1217		start_track = acmd[6];
1218		bp = buf + 2;
1219		*bp++ = 1;
1220		*bp++ = 1;
1221
1222		*bp++ = 1;
1223		*bp++ = 0x14;
1224		*bp++ = 0;
1225		*bp++ = 0xa0;
1226		*bp++ = 0;
1227		*bp++ = 0;
1228		*bp++ = 0;
1229		*bp++ = 0;
1230		*bp++ = 1;
1231		*bp++ = 0;
1232		*bp++ = 0;
1233
1234		*bp++ = 1;
1235		*bp++ = 0x14;
1236		*bp++ = 0;
1237		*bp++ = 0xa1;
1238		*bp++ = 0;
1239		*bp++ = 0;
1240		*bp++ = 0;
1241		*bp++ = 0;
1242		*bp++ = 1;
1243		*bp++ = 0;
1244		*bp++ = 0;
1245
1246		*bp++ = 1;
1247		*bp++ = 0x14;
1248		*bp++ = 0;
1249		*bp++ = 0xa2;
1250		*bp++ = 0;
1251		*bp++ = 0;
1252		*bp++ = 0;
1253		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1254		sectors >>= 2;
1255		if (msf) {
1256			*bp++ = 0;
1257			lba_to_msf(bp, sectors);
1258			bp += 3;
1259		} else {
1260			be32enc(bp, sectors);
1261			bp += 4;
1262		}
1263
1264		*bp++ = 1;
1265		*bp++ = 0x14;
1266		*bp++ = 0;
1267		*bp++ = 1;
1268		*bp++ = 0;
1269		*bp++ = 0;
1270		*bp++ = 0;
1271		if (msf) {
1272			*bp++ = 0;
1273			lba_to_msf(bp, 0);
1274			bp += 3;
1275		} else {
1276			*bp++ = 0;
1277			*bp++ = 0;
1278			*bp++ = 0;
1279			*bp++ = 0;
1280		}
1281
1282		size = bp - buf;
1283		be16enc(buf, size - 2);
1284		if (len > size)
1285			len = size;
1286		write_prdt(p, slot, cfis, buf, len);
1287		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1288		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1289		break;
1290	}
1291	default:
1292	{
1293		uint32_t tfd;
1294
1295		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1296		p->asc = 0x24;
1297		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1298		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1299		ahci_write_fis_d2h(p, slot, cfis, tfd);
1300		break;
1301	}
1302	}
1303}
1304
1305static void
1306atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1307{
1308	uint8_t buf[16];
1309
1310	memset(buf, 0, sizeof(buf));
1311	buf[3] = 8;
1312
1313	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1314	write_prdt(p, slot, cfis, buf, sizeof(buf));
1315	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1316}
1317
1318static void
1319atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1320{
1321	struct ahci_ioreq *aior;
1322	struct ahci_cmd_hdr *hdr;
1323	struct ahci_prdt_entry *prdt;
1324	struct blockif_req *breq;
1325	struct pci_ahci_softc *sc;
1326	uint8_t *acmd;
1327	uint64_t lba;
1328	uint32_t len;
1329	int err;
1330
1331	sc = p->pr_sc;
1332	acmd = cfis + 0x40;
1333	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1334	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1335
1336	lba = be32dec(acmd + 2);
1337	if (acmd[0] == READ_10)
1338		len = be16dec(acmd + 7);
1339	else
1340		len = be32dec(acmd + 6);
1341	if (len == 0) {
1342		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1343		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1344	}
1345	lba *= 2048;
1346	len *= 2048;
1347
1348	/*
1349	 * Pull request off free list
1350	 */
1351	aior = STAILQ_FIRST(&p->iofhd);
1352	assert(aior != NULL);
1353	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1354	aior->cfis = cfis;
1355	aior->slot = slot;
1356	aior->len = len;
1357	aior->done = done;
1358	breq = &aior->io_req;
1359	breq->br_offset = lba + done;
1360	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1361
1362	/* Mark this command in-flight. */
1363	p->pending |= 1 << slot;
1364
1365	/* Stuff request onto busy list. */
1366	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1367
1368	err = blockif_read(p->bctx, breq);
1369	assert(err == 0);
1370}
1371
1372static void
1373atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1374{
1375	uint8_t buf[64];
1376	uint8_t *acmd;
1377	int len;
1378
1379	acmd = cfis + 0x40;
1380	len = acmd[4];
1381	if (len > sizeof(buf))
1382		len = sizeof(buf);
1383	memset(buf, 0, len);
1384	buf[0] = 0x70 | (1 << 7);
1385	buf[2] = p->sense_key;
1386	buf[7] = 10;
1387	buf[12] = p->asc;
1388	write_prdt(p, slot, cfis, buf, len);
1389	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1390	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1391}
1392
1393static void
1394atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1395{
1396	uint8_t *acmd = cfis + 0x40;
1397	uint32_t tfd;
1398
1399	switch (acmd[4] & 3) {
1400	case 0:
1401	case 1:
1402	case 3:
1403		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1404		tfd = ATA_S_READY | ATA_S_DSC;
1405		break;
1406	case 2:
1407		/* TODO eject media */
1408		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1409		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1410		p->asc = 0x53;
1411		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1412		break;
1413	}
1414	ahci_write_fis_d2h(p, slot, cfis, tfd);
1415}
1416
1417static void
1418atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1419{
1420	uint8_t *acmd;
1421	uint32_t tfd;
1422	uint8_t pc, code;
1423	int len;
1424
1425	acmd = cfis + 0x40;
1426	len = be16dec(acmd + 7);
1427	pc = acmd[2] >> 6;
1428	code = acmd[2] & 0x3f;
1429
1430	switch (pc) {
1431	case 0:
1432		switch (code) {
1433		case MODEPAGE_RW_ERROR_RECOVERY:
1434		{
1435			uint8_t buf[16];
1436
1437			if (len > sizeof(buf))
1438				len = sizeof(buf);
1439
1440			memset(buf, 0, sizeof(buf));
1441			be16enc(buf, 16 - 2);
1442			buf[2] = 0x70;
1443			buf[8] = 0x01;
1444			buf[9] = 16 - 10;
1445			buf[11] = 0x05;
1446			write_prdt(p, slot, cfis, buf, len);
1447			tfd = ATA_S_READY | ATA_S_DSC;
1448			break;
1449		}
1450		case MODEPAGE_CD_CAPABILITIES:
1451		{
1452			uint8_t buf[30];
1453
1454			if (len > sizeof(buf))
1455				len = sizeof(buf);
1456
1457			memset(buf, 0, sizeof(buf));
1458			be16enc(buf, 30 - 2);
1459			buf[2] = 0x70;
1460			buf[8] = 0x2A;
1461			buf[9] = 30 - 10;
1462			buf[10] = 0x08;
1463			buf[12] = 0x71;
1464			be16enc(&buf[18], 2);
1465			be16enc(&buf[20], 512);
1466			write_prdt(p, slot, cfis, buf, len);
1467			tfd = ATA_S_READY | ATA_S_DSC;
1468			break;
1469		}
1470		default:
1471			goto error;
1472			break;
1473		}
1474		break;
1475	case 3:
1476		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1477		p->asc = 0x39;
1478		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1479		break;
1480error:
1481	case 1:
1482	case 2:
1483		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1484		p->asc = 0x24;
1485		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1486		break;
1487	}
1488	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1489	ahci_write_fis_d2h(p, slot, cfis, tfd);
1490}
1491
1492static void
1493atapi_get_event_status_notification(struct ahci_port *p, int slot,
1494    uint8_t *cfis)
1495{
1496	uint8_t *acmd;
1497	uint32_t tfd;
1498
1499	acmd = cfis + 0x40;
1500
1501	/* we don't support asynchronous operation */
1502	if (!(acmd[1] & 1)) {
1503		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1504		p->asc = 0x24;
1505		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1506	} else {
1507		uint8_t buf[8];
1508		int len;
1509
1510		len = be16dec(acmd + 7);
1511		if (len > sizeof(buf))
1512			len = sizeof(buf);
1513
1514		memset(buf, 0, sizeof(buf));
1515		be16enc(buf, 8 - 2);
1516		buf[2] = 0x04;
1517		buf[3] = 0x10;
1518		buf[5] = 0x02;
1519		write_prdt(p, slot, cfis, buf, len);
1520		tfd = ATA_S_READY | ATA_S_DSC;
1521	}
1522	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1523	ahci_write_fis_d2h(p, slot, cfis, tfd);
1524}
1525
1526static void
1527handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1528{
1529	uint8_t *acmd;
1530
1531	acmd = cfis + 0x40;
1532
1533#ifdef AHCI_DEBUG
1534	{
1535		int i;
1536		DPRINTF("ACMD:");
1537		for (i = 0; i < 16; i++)
1538			DPRINTF("%02x ", acmd[i]);
1539		DPRINTF("\n");
1540	}
1541#endif
1542
1543	switch (acmd[0]) {
1544	case TEST_UNIT_READY:
1545		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1546		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1547		break;
1548	case INQUIRY:
1549		atapi_inquiry(p, slot, cfis);
1550		break;
1551	case READ_CAPACITY:
1552		atapi_read_capacity(p, slot, cfis);
1553		break;
1554	case PREVENT_ALLOW:
1555		/* TODO */
1556		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1557		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1558		break;
1559	case READ_TOC:
1560		atapi_read_toc(p, slot, cfis);
1561		break;
1562	case REPORT_LUNS:
1563		atapi_report_luns(p, slot, cfis);
1564		break;
1565	case READ_10:
1566	case READ_12:
1567		atapi_read(p, slot, cfis, 0);
1568		break;
1569	case REQUEST_SENSE:
1570		atapi_request_sense(p, slot, cfis);
1571		break;
1572	case START_STOP_UNIT:
1573		atapi_start_stop_unit(p, slot, cfis);
1574		break;
1575	case MODE_SENSE_10:
1576		atapi_mode_sense(p, slot, cfis);
1577		break;
1578	case GET_EVENT_STATUS_NOTIFICATION:
1579		atapi_get_event_status_notification(p, slot, cfis);
1580		break;
1581	default:
1582		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1583		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1584		p->asc = 0x20;
1585		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1586				ATA_S_READY | ATA_S_ERROR);
1587		break;
1588	}
1589}
1590
1591static void
1592ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1593{
1594
1595	p->tfd |= ATA_S_BUSY;
1596	switch (cfis[2]) {
1597	case ATA_ATA_IDENTIFY:
1598		handle_identify(p, slot, cfis);
1599		break;
1600	case ATA_SETFEATURES:
1601	{
1602		switch (cfis[3]) {
1603		case ATA_SF_ENAB_SATA_SF:
1604			switch (cfis[12]) {
1605			case ATA_SATA_SF_AN:
1606				p->tfd = ATA_S_DSC | ATA_S_READY;
1607				break;
1608			default:
1609				p->tfd = ATA_S_ERROR | ATA_S_READY;
1610				p->tfd |= (ATA_ERROR_ABORT << 8);
1611				break;
1612			}
1613			break;
1614		case ATA_SF_ENAB_WCACHE:
1615		case ATA_SF_DIS_WCACHE:
1616		case ATA_SF_ENAB_RCACHE:
1617		case ATA_SF_DIS_RCACHE:
1618			p->tfd = ATA_S_DSC | ATA_S_READY;
1619			break;
1620		case ATA_SF_SETXFER:
1621		{
1622			switch (cfis[12] & 0xf8) {
1623			case ATA_PIO:
1624			case ATA_PIO0:
1625				break;
1626			case ATA_WDMA0:
1627			case ATA_UDMA0:
1628				p->xfermode = (cfis[12] & 0x7);
1629				break;
1630			}
1631			p->tfd = ATA_S_DSC | ATA_S_READY;
1632			break;
1633		}
1634		default:
1635			p->tfd = ATA_S_ERROR | ATA_S_READY;
1636			p->tfd |= (ATA_ERROR_ABORT << 8);
1637			break;
1638		}
1639		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1640		break;
1641	}
1642	case ATA_SET_MULTI:
1643		if (cfis[12] != 0 &&
1644			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1645			p->tfd = ATA_S_ERROR | ATA_S_READY;
1646			p->tfd |= (ATA_ERROR_ABORT << 8);
1647		} else {
1648			p->mult_sectors = cfis[12];
1649			p->tfd = ATA_S_DSC | ATA_S_READY;
1650		}
1651		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1652		break;
1653	case ATA_READ:
1654	case ATA_WRITE:
1655	case ATA_READ48:
1656	case ATA_WRITE48:
1657	case ATA_READ_MUL:
1658	case ATA_WRITE_MUL:
1659	case ATA_READ_MUL48:
1660	case ATA_WRITE_MUL48:
1661	case ATA_READ_DMA:
1662	case ATA_WRITE_DMA:
1663	case ATA_READ_DMA48:
1664	case ATA_WRITE_DMA48:
1665	case ATA_READ_FPDMA_QUEUED:
1666	case ATA_WRITE_FPDMA_QUEUED:
1667		ahci_handle_rw(p, slot, cfis, 0);
1668		break;
1669	case ATA_FLUSHCACHE:
1670	case ATA_FLUSHCACHE48:
1671		ahci_handle_flush(p, slot, cfis);
1672		break;
1673	case ATA_DATA_SET_MANAGEMENT:
1674		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1675		    cfis[13] == 0 && cfis[12] == 1) {
1676			ahci_handle_dsm_trim(p, slot, cfis, 0);
1677			break;
1678		}
1679		ahci_write_fis_d2h(p, slot, cfis,
1680		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1681		break;
1682	case ATA_SEND_FPDMA_QUEUED:
1683		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1684		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1685		    cfis[11] == 0 && cfis[3] == 1) {
1686			ahci_handle_dsm_trim(p, slot, cfis, 0);
1687			break;
1688		}
1689		ahci_write_fis_d2h(p, slot, cfis,
1690		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1691		break;
1692	case ATA_READ_LOG_EXT:
1693	case ATA_READ_LOG_DMA_EXT:
1694		ahci_handle_read_log(p, slot, cfis);
1695		break;
1696	case ATA_SECURITY_FREEZE_LOCK:
1697	case ATA_SMART_CMD:
1698	case ATA_NOP:
1699		ahci_write_fis_d2h(p, slot, cfis,
1700		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1701		break;
1702	case ATA_CHECK_POWER_MODE:
1703		cfis[12] = 0xff;	/* always on */
1704		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1705		break;
1706	case ATA_STANDBY_CMD:
1707	case ATA_STANDBY_IMMEDIATE:
1708	case ATA_IDLE_CMD:
1709	case ATA_IDLE_IMMEDIATE:
1710	case ATA_SLEEP:
1711	case ATA_READ_VERIFY:
1712	case ATA_READ_VERIFY48:
1713		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1714		break;
1715	case ATA_ATAPI_IDENTIFY:
1716		handle_atapi_identify(p, slot, cfis);
1717		break;
1718	case ATA_PACKET_CMD:
1719		if (!p->atapi) {
1720			ahci_write_fis_d2h(p, slot, cfis,
1721			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1722		} else
1723			handle_packet_cmd(p, slot, cfis);
1724		break;
1725	default:
1726		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1727		ahci_write_fis_d2h(p, slot, cfis,
1728		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1729		break;
1730	}
1731}
1732
1733static void
1734ahci_handle_slot(struct ahci_port *p, int slot)
1735{
1736	struct ahci_cmd_hdr *hdr;
1737#ifdef AHCI_DEBUG
1738	struct ahci_prdt_entry *prdt;
1739#endif
1740	struct pci_ahci_softc *sc;
1741	uint8_t *cfis;
1742#ifdef AHCI_DEBUG
1743	int cfl;
1744#endif
1745
1746	sc = p->pr_sc;
1747	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1748#ifdef AHCI_DEBUG
1749	cfl = (hdr->flags & 0x1f) * 4;
1750#endif
1751	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1752			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1753#ifdef AHCI_DEBUG
1754	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1755
1756	DPRINTF("\ncfis:");
1757	for (i = 0; i < cfl; i++) {
1758		if (i % 10 == 0)
1759			DPRINTF("\n");
1760		DPRINTF("%02x ", cfis[i]);
1761	}
1762	DPRINTF("\n");
1763
1764	for (i = 0; i < hdr->prdtl; i++) {
1765		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1766		prdt++;
1767	}
1768#endif
1769
1770	if (cfis[0] != FIS_TYPE_REGH2D) {
1771		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1772		return;
1773	}
1774
1775	if (cfis[1] & 0x80) {
1776		ahci_handle_cmd(p, slot, cfis);
1777	} else {
1778		if (cfis[15] & (1 << 2))
1779			p->reset = 1;
1780		else if (p->reset) {
1781			p->reset = 0;
1782			ahci_port_reset(p);
1783		}
1784		p->ci &= ~(1 << slot);
1785	}
1786}
1787
1788static void
1789ahci_handle_port(struct ahci_port *p)
1790{
1791
1792	if (!(p->cmd & AHCI_P_CMD_ST))
1793		return;
1794
1795	/*
1796	 * Search for any new commands to issue ignoring those that
1797	 * are already in-flight.  Stop if device is busy or in error.
1798	 */
1799	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1800		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1801			break;
1802		if (p->waitforclear)
1803			break;
1804		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1805			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1806			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1807			ahci_handle_slot(p, p->ccs);
1808		}
1809	}
1810}
1811
1812/*
1813 * blockif callback routine - this runs in the context of the blockif
1814 * i/o thread, so the mutex needs to be acquired.
1815 */
1816static void
1817ata_ioreq_cb(struct blockif_req *br, int err)
1818{
1819	struct ahci_cmd_hdr *hdr;
1820	struct ahci_ioreq *aior;
1821	struct ahci_port *p;
1822	struct pci_ahci_softc *sc;
1823	uint32_t tfd;
1824	uint8_t *cfis;
1825	int slot, ncq, dsm;
1826
1827	DPRINTF("%s %d\n", __func__, err);
1828
1829	ncq = dsm = 0;
1830	aior = br->br_param;
1831	p = aior->io_pr;
1832	cfis = aior->cfis;
1833	slot = aior->slot;
1834	sc = p->pr_sc;
1835	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1836
1837	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1838	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1839	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1840		ncq = 1;
1841	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1842	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1843	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1844		dsm = 1;
1845
1846	pthread_mutex_lock(&sc->mtx);
1847
1848	/*
1849	 * Delete the blockif request from the busy list
1850	 */
1851	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1852
1853	/*
1854	 * Move the blockif request back to the free list
1855	 */
1856	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1857
1858	if (!err)
1859		hdr->prdbc = aior->done;
1860
1861	if (!err && aior->more) {
1862		if (dsm)
1863			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1864		else
1865			ahci_handle_rw(p, slot, cfis, aior->done);
1866		goto out;
1867	}
1868
1869	if (!err)
1870		tfd = ATA_S_READY | ATA_S_DSC;
1871	else
1872		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1873	if (ncq)
1874		ahci_write_fis_sdb(p, slot, cfis, tfd);
1875	else
1876		ahci_write_fis_d2h(p, slot, cfis, tfd);
1877
1878	/*
1879	 * This command is now complete.
1880	 */
1881	p->pending &= ~(1 << slot);
1882
1883	ahci_check_stopped(p);
1884	ahci_handle_port(p);
1885out:
1886	pthread_mutex_unlock(&sc->mtx);
1887	DPRINTF("%s exit\n", __func__);
1888}
1889
1890static void
1891atapi_ioreq_cb(struct blockif_req *br, int err)
1892{
1893	struct ahci_cmd_hdr *hdr;
1894	struct ahci_ioreq *aior;
1895	struct ahci_port *p;
1896	struct pci_ahci_softc *sc;
1897	uint8_t *cfis;
1898	uint32_t tfd;
1899	int slot;
1900
1901	DPRINTF("%s %d\n", __func__, err);
1902
1903	aior = br->br_param;
1904	p = aior->io_pr;
1905	cfis = aior->cfis;
1906	slot = aior->slot;
1907	sc = p->pr_sc;
1908	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1909
1910	pthread_mutex_lock(&sc->mtx);
1911
1912	/*
1913	 * Delete the blockif request from the busy list
1914	 */
1915	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1916
1917	/*
1918	 * Move the blockif request back to the free list
1919	 */
1920	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1921
1922	if (!err)
1923		hdr->prdbc = aior->done;
1924
1925	if (!err && aior->more) {
1926		atapi_read(p, slot, cfis, aior->done);
1927		goto out;
1928	}
1929
1930	if (!err) {
1931		tfd = ATA_S_READY | ATA_S_DSC;
1932	} else {
1933		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1934		p->asc = 0x21;
1935		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1936	}
1937	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1938	ahci_write_fis_d2h(p, slot, cfis, tfd);
1939
1940	/*
1941	 * This command is now complete.
1942	 */
1943	p->pending &= ~(1 << slot);
1944
1945	ahci_check_stopped(p);
1946	ahci_handle_port(p);
1947out:
1948	pthread_mutex_unlock(&sc->mtx);
1949	DPRINTF("%s exit\n", __func__);
1950}
1951
1952static void
1953pci_ahci_ioreq_init(struct ahci_port *pr)
1954{
1955	struct ahci_ioreq *vr;
1956	int i;
1957
1958	pr->ioqsz = blockif_queuesz(pr->bctx);
1959	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1960	STAILQ_INIT(&pr->iofhd);
1961
1962	/*
1963	 * Add all i/o request entries to the free queue
1964	 */
1965	for (i = 0; i < pr->ioqsz; i++) {
1966		vr = &pr->ioreq[i];
1967		vr->io_pr = pr;
1968		if (!pr->atapi)
1969			vr->io_req.br_callback = ata_ioreq_cb;
1970		else
1971			vr->io_req.br_callback = atapi_ioreq_cb;
1972		vr->io_req.br_param = vr;
1973		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1974	}
1975
1976	TAILQ_INIT(&pr->iobhd);
1977}
1978
1979static void
1980pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1981{
1982	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1983	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1984	struct ahci_port *p = &sc->port[port];
1985
1986	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1987		port, offset, value);
1988
1989	switch (offset) {
1990	case AHCI_P_CLB:
1991		p->clb = value;
1992		break;
1993	case AHCI_P_CLBU:
1994		p->clbu = value;
1995		break;
1996	case AHCI_P_FB:
1997		p->fb = value;
1998		break;
1999	case AHCI_P_FBU:
2000		p->fbu = value;
2001		break;
2002	case AHCI_P_IS:
2003		p->is &= ~value;
2004		break;
2005	case AHCI_P_IE:
2006		p->ie = value & 0xFDC000FF;
2007		ahci_generate_intr(sc);
2008		break;
2009	case AHCI_P_CMD:
2010	{
2011		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2012		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2013		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2014		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2015		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2016		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2017		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2018		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2019
2020		if (!(value & AHCI_P_CMD_ST)) {
2021			ahci_port_stop(p);
2022		} else {
2023			uint64_t clb;
2024
2025			p->cmd |= AHCI_P_CMD_CR;
2026			clb = (uint64_t)p->clbu << 32 | p->clb;
2027			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2028					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2029		}
2030
2031		if (value & AHCI_P_CMD_FRE) {
2032			uint64_t fb;
2033
2034			p->cmd |= AHCI_P_CMD_FR;
2035			fb = (uint64_t)p->fbu << 32 | p->fb;
2036			/* we don't support FBSCP, so rfis size is 256Bytes */
2037			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2038		} else {
2039			p->cmd &= ~AHCI_P_CMD_FR;
2040		}
2041
2042		if (value & AHCI_P_CMD_CLO) {
2043			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2044			p->cmd &= ~AHCI_P_CMD_CLO;
2045		}
2046
2047		if (value & AHCI_P_CMD_ICC_MASK) {
2048			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2049		}
2050
2051		ahci_handle_port(p);
2052		break;
2053	}
2054	case AHCI_P_TFD:
2055	case AHCI_P_SIG:
2056	case AHCI_P_SSTS:
2057		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
2058		break;
2059	case AHCI_P_SCTL:
2060		p->sctl = value;
2061		if (!(p->cmd & AHCI_P_CMD_ST)) {
2062			if (value & ATA_SC_DET_RESET)
2063				ahci_port_reset(p);
2064		}
2065		break;
2066	case AHCI_P_SERR:
2067		p->serr &= ~value;
2068		break;
2069	case AHCI_P_SACT:
2070		p->sact |= value;
2071		break;
2072	case AHCI_P_CI:
2073		p->ci |= value;
2074		ahci_handle_port(p);
2075		break;
2076	case AHCI_P_SNTF:
2077	case AHCI_P_FBS:
2078	default:
2079		break;
2080	}
2081}
2082
2083static void
2084pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2085{
2086	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2087		offset, value);
2088
2089	switch (offset) {
2090	case AHCI_CAP:
2091	case AHCI_PI:
2092	case AHCI_VS:
2093	case AHCI_CAP2:
2094		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2095		break;
2096	case AHCI_GHC:
2097		if (value & AHCI_GHC_HR)
2098			ahci_reset(sc);
2099		else if (value & AHCI_GHC_IE) {
2100			sc->ghc |= AHCI_GHC_IE;
2101			ahci_generate_intr(sc);
2102		}
2103		break;
2104	case AHCI_IS:
2105		sc->is &= ~value;
2106		ahci_generate_intr(sc);
2107		break;
2108	default:
2109		break;
2110	}
2111}
2112
2113static void
2114pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2115		int baridx, uint64_t offset, int size, uint64_t value)
2116{
2117	struct pci_ahci_softc *sc = pi->pi_arg;
2118
2119	assert(baridx == 5);
2120	assert((offset % 4) == 0 && size == 4);
2121
2122	pthread_mutex_lock(&sc->mtx);
2123
2124	if (offset < AHCI_OFFSET)
2125		pci_ahci_host_write(sc, offset, value);
2126	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2127		pci_ahci_port_write(sc, offset, value);
2128	else
2129		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2130
2131	pthread_mutex_unlock(&sc->mtx);
2132}
2133
2134static uint64_t
2135pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2136{
2137	uint32_t value;
2138
2139	switch (offset) {
2140	case AHCI_CAP:
2141	case AHCI_GHC:
2142	case AHCI_IS:
2143	case AHCI_PI:
2144	case AHCI_VS:
2145	case AHCI_CCCC:
2146	case AHCI_CCCP:
2147	case AHCI_EM_LOC:
2148	case AHCI_EM_CTL:
2149	case AHCI_CAP2:
2150	{
2151		uint32_t *p = &sc->cap;
2152		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2153		value = *p;
2154		break;
2155	}
2156	default:
2157		value = 0;
2158		break;
2159	}
2160	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2161		offset, value);
2162
2163	return (value);
2164}
2165
2166static uint64_t
2167pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2168{
2169	uint32_t value;
2170	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2171	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2172
2173	switch (offset) {
2174	case AHCI_P_CLB:
2175	case AHCI_P_CLBU:
2176	case AHCI_P_FB:
2177	case AHCI_P_FBU:
2178	case AHCI_P_IS:
2179	case AHCI_P_IE:
2180	case AHCI_P_CMD:
2181	case AHCI_P_TFD:
2182	case AHCI_P_SIG:
2183	case AHCI_P_SSTS:
2184	case AHCI_P_SCTL:
2185	case AHCI_P_SERR:
2186	case AHCI_P_SACT:
2187	case AHCI_P_CI:
2188	case AHCI_P_SNTF:
2189	case AHCI_P_FBS:
2190	{
2191		uint32_t *p= &sc->port[port].clb;
2192		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2193		value = *p;
2194		break;
2195	}
2196	default:
2197		value = 0;
2198		break;
2199	}
2200
2201	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2202		port, offset, value);
2203
2204	return value;
2205}
2206
2207static uint64_t
2208pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2209    uint64_t regoff, int size)
2210{
2211	struct pci_ahci_softc *sc = pi->pi_arg;
2212	uint64_t offset;
2213	uint32_t value;
2214
2215	assert(baridx == 5);
2216	assert(size == 1 || size == 2 || size == 4);
2217	assert((regoff & (size - 1)) == 0);
2218
2219	pthread_mutex_lock(&sc->mtx);
2220
2221	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2222	if (offset < AHCI_OFFSET)
2223		value = pci_ahci_host_read(sc, offset);
2224	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2225		value = pci_ahci_port_read(sc, offset);
2226	else {
2227		value = 0;
2228		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n",
2229		    regoff);
2230	}
2231	value >>= 8 * (regoff & 0x3);
2232
2233	pthread_mutex_unlock(&sc->mtx);
2234
2235	return (value);
2236}
2237
2238static int
2239pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2240{
2241	char bident[sizeof("XX:XX:XX")];
2242	struct blockif_ctxt *bctxt;
2243	struct pci_ahci_softc *sc;
2244	int ret, slots, p;
2245	MD5_CTX mdctx;
2246	u_char digest[16];
2247	char *next, *next2;
2248
2249	ret = 0;
2250
2251#ifdef AHCI_DEBUG
2252	dbg = fopen("/tmp/log", "w+");
2253#endif
2254
2255	sc = calloc(1, sizeof(struct pci_ahci_softc));
2256	pi->pi_arg = sc;
2257	sc->asc_pi = pi;
2258	pthread_mutex_init(&sc->mtx, NULL);
2259	sc->ports = 0;
2260	sc->pi = 0;
2261	slots = 32;
2262
2263	for (p = 0; p < MAX_PORTS && opts != NULL; p++, opts = next) {
2264		/* Identify and cut off type of present port. */
2265		if (strncmp(opts, "hd:", 3) == 0) {
2266			atapi = 0;
2267			opts += 3;
2268		} else if (strncmp(opts, "cd:", 3) == 0) {
2269			atapi = 1;
2270			opts += 3;
2271		}
2272
2273		/* Find and cut off the next port options. */
2274		next = strstr(opts, ",hd:");
2275		next2 = strstr(opts, ",cd:");
2276		if (next == NULL || (next2 != NULL && next2 < next))
2277			next = next2;
2278		if (next != NULL) {
2279			next[0] = 0;
2280			next++;
2281		}
2282
2283		if (opts[0] == 0)
2284			continue;
2285
2286		/*
2287		 * Attempt to open the backing image. Use the PCI slot/func
2288		 * and the port number for the identifier string.
2289		 */
2290		snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot,
2291		    pi->pi_func, p);
2292		bctxt = blockif_open(opts, bident);
2293		if (bctxt == NULL) {
2294			sc->ports = p;
2295			ret = 1;
2296			goto open_fail;
2297		}
2298		sc->port[p].bctx = bctxt;
2299		sc->port[p].pr_sc = sc;
2300		sc->port[p].atapi = atapi;
2301
2302		/*
2303		 * Create an identifier for the backing file.
2304		 * Use parts of the md5 sum of the filename
2305		 */
2306		MD5Init(&mdctx);
2307		MD5Update(&mdctx, opts, strlen(opts));
2308		MD5Final(digest, &mdctx);
2309		sprintf(sc->port[p].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2310		    digest[0], digest[1], digest[2], digest[3], digest[4],
2311		    digest[5]);
2312
2313		/*
2314		 * Allocate blockif request structures and add them
2315		 * to the free list
2316		 */
2317		pci_ahci_ioreq_init(&sc->port[p]);
2318
2319		sc->pi |= (1 << p);
2320		if (sc->port[p].ioqsz < slots)
2321			slots = sc->port[p].ioqsz;
2322	}
2323	sc->ports = p;
2324
2325	/* Intel ICH8 AHCI */
2326	--slots;
2327	if (sc->ports < DEF_PORTS)
2328		sc->ports = DEF_PORTS;
2329	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2330	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2331	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2332	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2333	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2334
2335	sc->vs = 0x10300;
2336	sc->cap2 = AHCI_CAP2_APST;
2337	ahci_reset(sc);
2338
2339	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2340	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2341	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2342	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2343	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2344	pci_emul_add_msicap(pi, 1);
2345	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2346	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2347
2348	pci_lintr_request(pi);
2349
2350open_fail:
2351	if (ret) {
2352		for (p = 0; p < sc->ports; p++) {
2353			if (sc->port[p].bctx != NULL)
2354				blockif_close(sc->port[p].bctx);
2355		}
2356		free(sc);
2357	}
2358
2359	return (ret);
2360}
2361
2362static int
2363pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2364{
2365
2366	return (pci_ahci_init(ctx, pi, opts, 0));
2367}
2368
2369static int
2370pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2371{
2372
2373	return (pci_ahci_init(ctx, pi, opts, 1));
2374}
2375
2376/*
2377 * Use separate emulation names to distinguish drive and atapi devices
2378 */
2379struct pci_devemu pci_de_ahci = {
2380	.pe_emu =	"ahci",
2381	.pe_init =	pci_ahci_hd_init,
2382	.pe_barwrite =	pci_ahci_write,
2383	.pe_barread =	pci_ahci_read
2384};
2385PCI_EMUL_SET(pci_de_ahci);
2386
2387struct pci_devemu pci_de_ahci_hd = {
2388	.pe_emu =	"ahci-hd",
2389	.pe_init =	pci_ahci_hd_init,
2390	.pe_barwrite =	pci_ahci_write,
2391	.pe_barread =	pci_ahci_read
2392};
2393PCI_EMUL_SET(pci_de_ahci_hd);
2394
2395struct pci_devemu pci_de_ahci_cd = {
2396	.pe_emu =	"ahci-cd",
2397	.pe_init =	pci_ahci_atapi_init,
2398	.pe_barwrite =	pci_ahci_write,
2399	.pe_barread =	pci_ahci_read
2400};
2401PCI_EMUL_SET(pci_de_ahci_cd);
2402