1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <inttypes.h>
52
53#include "bhyverun.h"
54#include "pci_emul.h"
55#include "ahci.h"
56#include "block_if.h"
57
58#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
59
60#define	PxSIG_ATA	0x00000101 /* ATA drive */
61#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
62
63enum sata_fis_type {
64	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
65	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
66	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
67	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
68	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
69	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
70	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
71	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
72};
73
74/*
75 * SCSI opcodes
76 */
77#define	TEST_UNIT_READY		0x00
78#define	REQUEST_SENSE		0x03
79#define	INQUIRY			0x12
80#define	START_STOP_UNIT		0x1B
81#define	PREVENT_ALLOW		0x1E
82#define	READ_CAPACITY		0x25
83#define	READ_10			0x28
84#define	POSITION_TO_ELEMENT	0x2B
85#define	READ_TOC		0x43
86#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
87#define	MODE_SENSE_10		0x5A
88#define	READ_12			0xA8
89#define	READ_CD			0xBE
90
91/*
92 * SCSI mode page codes
93 */
94#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
95#define	MODEPAGE_CD_CAPABILITIES	0x2A
96
97/*
98 * ATA commands
99 */
100#define	ATA_SF_ENAB_SATA_SF		0x10
101#define		ATA_SATA_SF_AN		0x05
102#define	ATA_SF_DIS_SATA_SF		0x90
103
104/*
105 * Debug printf
106 */
107#ifdef AHCI_DEBUG
108static FILE *dbg;
109#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
110#else
111#define DPRINTF(format, arg...)
112#endif
113#define WPRINTF(format, arg...) printf(format, ##arg)
114
115struct ahci_ioreq {
116	struct blockif_req io_req;
117	struct ahci_port *io_pr;
118	STAILQ_ENTRY(ahci_ioreq) io_list;
119	uint8_t *cfis;
120	uint32_t len;
121	uint32_t done;
122	int slot;
123	int prdtl;
124};
125
126struct ahci_port {
127	struct blockif_ctxt *bctx;
128	struct pci_ahci_softc *pr_sc;
129	uint8_t *cmd_lst;
130	uint8_t *rfis;
131	int atapi;
132	int reset;
133	int mult_sectors;
134	uint8_t xfermode;
135	uint8_t sense_key;
136	uint8_t asc;
137	uint32_t pending;
138
139	uint32_t clb;
140	uint32_t clbu;
141	uint32_t fb;
142	uint32_t fbu;
143	uint32_t is;
144	uint32_t ie;
145	uint32_t cmd;
146	uint32_t unused0;
147	uint32_t tfd;
148	uint32_t sig;
149	uint32_t ssts;
150	uint32_t sctl;
151	uint32_t serr;
152	uint32_t sact;
153	uint32_t ci;
154	uint32_t sntf;
155	uint32_t fbs;
156
157	/*
158	 * i/o request info
159	 */
160	struct ahci_ioreq *ioreq;
161	int ioqsz;
162	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
163};
164
165struct ahci_cmd_hdr {
166	uint16_t flags;
167	uint16_t prdtl;
168	uint32_t prdbc;
169	uint64_t ctba;
170	uint32_t reserved[4];
171};
172
173struct ahci_prdt_entry {
174	uint64_t dba;
175	uint32_t reserved;
176#define	DBCMASK		0x3fffff
177	uint32_t dbc;
178};
179
180struct pci_ahci_softc {
181	struct pci_devinst *asc_pi;
182	pthread_mutex_t	mtx;
183	int ports;
184	uint32_t cap;
185	uint32_t ghc;
186	uint32_t is;
187	uint32_t pi;
188	uint32_t vs;
189	uint32_t ccc_ctl;
190	uint32_t ccc_pts;
191	uint32_t em_loc;
192	uint32_t em_ctl;
193	uint32_t cap2;
194	uint32_t bohc;
195	uint32_t lintr;
196	struct ahci_port port[MAX_PORTS];
197};
198#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
199
200static inline void lba_to_msf(uint8_t *buf, int lba)
201{
202	lba += 150;
203	buf[0] = (lba / 75) / 60;
204	buf[1] = (lba / 75) % 60;
205	buf[2] = lba % 75;
206}
207
208/*
209 * generate HBA intr depending on whether or not ports within
210 * the controller have an interrupt pending.
211 */
212static void
213ahci_generate_intr(struct pci_ahci_softc *sc)
214{
215	struct pci_devinst *pi;
216	int i;
217
218	pi = sc->asc_pi;
219
220	for (i = 0; i < sc->ports; i++) {
221		struct ahci_port *pr;
222		pr = &sc->port[i];
223		if (pr->is & pr->ie)
224			sc->is |= (1 << i);
225	}
226
227	DPRINTF("%s %x\n", __func__, sc->is);
228
229	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
230		if (pci_msi_enabled(pi)) {
231			/*
232			 * Generate an MSI interrupt on every edge
233			 */
234			pci_generate_msi(pi, 0);
235		} else if (!sc->lintr) {
236			/*
237			 * Only generate a pin-based interrupt if one wasn't
238			 * in progress
239			 */
240			sc->lintr = 1;
241			pci_lintr_assert(pi);
242		}
243	} else if (sc->lintr) {
244		/*
245		 * No interrupts: deassert pin-based signal if it had
246		 * been asserted
247		 */
248		pci_lintr_deassert(pi);
249		sc->lintr = 0;
250	}
251}
252
253static void
254ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
255{
256	int offset, len, irq;
257
258	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
259		return;
260
261	switch (ft) {
262	case FIS_TYPE_REGD2H:
263		offset = 0x40;
264		len = 20;
265		irq = AHCI_P_IX_DHR;
266		break;
267	case FIS_TYPE_SETDEVBITS:
268		offset = 0x58;
269		len = 8;
270		irq = AHCI_P_IX_SDB;
271		break;
272	case FIS_TYPE_PIOSETUP:
273		offset = 0x20;
274		len = 20;
275		irq = 0;
276		break;
277	default:
278		WPRINTF("unsupported fis type %d\n", ft);
279		return;
280	}
281	memcpy(p->rfis + offset, fis, len);
282	if (irq) {
283		p->is |= irq;
284		ahci_generate_intr(p->pr_sc);
285	}
286}
287
288static void
289ahci_write_fis_piosetup(struct ahci_port *p)
290{
291	uint8_t fis[20];
292
293	memset(fis, 0, sizeof(fis));
294	fis[0] = FIS_TYPE_PIOSETUP;
295	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
296}
297
298static void
299ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
300{
301	uint8_t fis[8];
302	uint8_t error;
303
304	error = (tfd >> 8) & 0xff;
305	memset(fis, 0, sizeof(fis));
306	fis[0] = error;
307	fis[2] = tfd & 0x77;
308	*(uint32_t *)(fis + 4) = (1 << slot);
309	if (fis[2] & ATA_S_ERROR)
310		p->is |= AHCI_P_IX_TFE;
311	p->tfd = tfd;
312	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
313}
314
315static void
316ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
317{
318	uint8_t fis[20];
319	uint8_t error;
320
321	error = (tfd >> 8) & 0xff;
322	memset(fis, 0, sizeof(fis));
323	fis[0] = FIS_TYPE_REGD2H;
324	fis[1] = (1 << 6);
325	fis[2] = tfd & 0xff;
326	fis[3] = error;
327	fis[4] = cfis[4];
328	fis[5] = cfis[5];
329	fis[6] = cfis[6];
330	fis[7] = cfis[7];
331	fis[8] = cfis[8];
332	fis[9] = cfis[9];
333	fis[10] = cfis[10];
334	fis[11] = cfis[11];
335	fis[12] = cfis[12];
336	fis[13] = cfis[13];
337	if (fis[2] & ATA_S_ERROR)
338		p->is |= AHCI_P_IX_TFE;
339	else
340		p->ci &= ~(1 << slot);
341	p->tfd = tfd;
342	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
343}
344
345static void
346ahci_write_reset_fis_d2h(struct ahci_port *p)
347{
348	uint8_t fis[20];
349
350	memset(fis, 0, sizeof(fis));
351	fis[0] = FIS_TYPE_REGD2H;
352	fis[3] = 1;
353	fis[4] = 1;
354	if (p->atapi) {
355		fis[5] = 0x14;
356		fis[6] = 0xeb;
357	}
358	fis[12] = 1;
359	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
360}
361
362static void
363ahci_port_reset(struct ahci_port *pr)
364{
365	pr->sctl = 0;
366	pr->serr = 0;
367	pr->sact = 0;
368	pr->xfermode = ATA_UDMA6;
369	pr->mult_sectors = 128;
370
371	if (!pr->bctx) {
372		pr->ssts = ATA_SS_DET_NO_DEVICE;
373		pr->sig = 0xFFFFFFFF;
374		pr->tfd = 0x7F;
375		return;
376	}
377	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
378		ATA_SS_IPM_ACTIVE;
379	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
380	if (!pr->atapi) {
381		pr->sig = PxSIG_ATA;
382		pr->tfd |= ATA_S_READY;
383	} else
384		pr->sig = PxSIG_ATAPI;
385	ahci_write_reset_fis_d2h(pr);
386}
387
388static void
389ahci_reset(struct pci_ahci_softc *sc)
390{
391	int i;
392
393	sc->ghc = AHCI_GHC_AE;
394	sc->is = 0;
395
396	if (sc->lintr) {
397		pci_lintr_deassert(sc->asc_pi);
398		sc->lintr = 0;
399	}
400
401	for (i = 0; i < sc->ports; i++) {
402		sc->port[i].ie = 0;
403		sc->port[i].is = 0;
404		ahci_port_reset(&sc->port[i]);
405	}
406}
407
408static void
409ata_string(uint8_t *dest, const char *src, int len)
410{
411	int i;
412
413	for (i = 0; i < len; i++) {
414		if (*src)
415			dest[i ^ 1] = *src++;
416		else
417			dest[i ^ 1] = ' ';
418	}
419}
420
421static void
422atapi_string(uint8_t *dest, const char *src, int len)
423{
424	int i;
425
426	for (i = 0; i < len; i++) {
427		if (*src)
428			dest[i] = *src++;
429		else
430			dest[i] = ' ';
431	}
432}
433
434static void
435ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
436    int seek)
437{
438	struct ahci_ioreq *aior;
439	struct blockif_req *breq;
440	struct pci_ahci_softc *sc;
441	struct ahci_prdt_entry *prdt;
442	struct ahci_cmd_hdr *hdr;
443	uint64_t lba;
444	uint32_t len;
445	int i, err, iovcnt, ncq, readop;
446
447	sc = p->pr_sc;
448	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
449	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
450	ncq = 0;
451	readop = 1;
452
453	prdt += seek;
454	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
455			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
456		readop = 0;
457
458	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
459			cfis[2] == ATA_READ_FPDMA_QUEUED) {
460		lba = ((uint64_t)cfis[10] << 40) |
461			((uint64_t)cfis[9] << 32) |
462			((uint64_t)cfis[8] << 24) |
463			((uint64_t)cfis[6] << 16) |
464			((uint64_t)cfis[5] << 8) |
465			cfis[4];
466		len = cfis[11] << 8 | cfis[3];
467		if (!len)
468			len = 65536;
469		ncq = 1;
470	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
471		lba = ((uint64_t)cfis[10] << 40) |
472			((uint64_t)cfis[9] << 32) |
473			((uint64_t)cfis[8] << 24) |
474			((uint64_t)cfis[6] << 16) |
475			((uint64_t)cfis[5] << 8) |
476			cfis[4];
477		len = cfis[13] << 8 | cfis[12];
478		if (!len)
479			len = 65536;
480	} else {
481		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
482			(cfis[5] << 8) | cfis[4];
483		len = cfis[12];
484		if (!len)
485			len = 256;
486	}
487	lba *= blockif_sectsz(p->bctx);
488	len *= blockif_sectsz(p->bctx);
489
490	/*
491	 * Pull request off free list
492	 */
493	aior = STAILQ_FIRST(&p->iofhd);
494	assert(aior != NULL);
495	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
496	aior->cfis = cfis;
497	aior->slot = slot;
498	aior->len = len;
499	aior->done = done;
500	breq = &aior->io_req;
501	breq->br_offset = lba + done;
502	iovcnt = hdr->prdtl - seek;
503	if (iovcnt > BLOCKIF_IOV_MAX) {
504		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
505		iovcnt = BLOCKIF_IOV_MAX;
506		/*
507		 * Mark this command in-flight.
508		 */
509		p->pending |= 1 << slot;
510	} else
511		aior->prdtl = 0;
512	breq->br_iovcnt = iovcnt;
513
514	/*
515	 * Build up the iovec based on the prdt
516	 */
517	for (i = 0; i < iovcnt; i++) {
518		uint32_t dbcsz;
519
520		dbcsz = (prdt->dbc & DBCMASK) + 1;
521		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
522		    prdt->dba, dbcsz);
523		breq->br_iov[i].iov_len = dbcsz;
524		aior->done += dbcsz;
525		prdt++;
526	}
527	if (readop)
528		err = blockif_read(p->bctx, breq);
529	else
530		err = blockif_write(p->bctx, breq);
531	assert(err == 0);
532
533	if (ncq)
534		p->ci &= ~(1 << slot);
535}
536
537static void
538ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
539{
540	struct ahci_ioreq *aior;
541	struct blockif_req *breq;
542	int err;
543
544	/*
545	 * Pull request off free list
546	 */
547	aior = STAILQ_FIRST(&p->iofhd);
548	assert(aior != NULL);
549	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
550	aior->cfis = cfis;
551	aior->slot = slot;
552	aior->len = 0;
553	aior->done = 0;
554	aior->prdtl = 0;
555	breq = &aior->io_req;
556
557	err = blockif_flush(p->bctx, breq);
558	assert(err == 0);
559}
560
561static inline void
562write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
563		void *buf, int size)
564{
565	struct ahci_cmd_hdr *hdr;
566	struct ahci_prdt_entry *prdt;
567	void *from;
568	int i, len;
569
570	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
571	len = size;
572	from = buf;
573	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
574	for (i = 0; i < hdr->prdtl && len; i++) {
575		uint8_t *ptr;
576		uint32_t dbcsz;
577		int sublen;
578
579		dbcsz = (prdt->dbc & DBCMASK) + 1;
580		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
581		sublen = len < dbcsz ? len : dbcsz;
582		memcpy(ptr, from, sublen);
583		len -= sublen;
584		from += sublen;
585		prdt++;
586	}
587	hdr->prdbc = size - len;
588}
589
590static void
591handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
592{
593	struct ahci_cmd_hdr *hdr;
594
595	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
596	if (p->atapi || hdr->prdtl == 0) {
597		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
598		p->is |= AHCI_P_IX_TFE;
599	} else {
600		uint16_t buf[256];
601		uint64_t sectors;
602		uint16_t cyl;
603		uint8_t sech, heads;
604
605		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
606		blockif_chs(p->bctx, &cyl, &heads, &sech);
607		memset(buf, 0, sizeof(buf));
608		buf[0] = 0x0040;
609		buf[1] = cyl;
610		buf[3] = heads;
611		buf[6] = sech;
612		/* TODO emulate different serial? */
613		ata_string((uint8_t *)(buf+10), "123456", 20);
614		ata_string((uint8_t *)(buf+23), "001", 8);
615		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
616		buf[47] = (0x8000 | 128);
617		buf[48] = 0x1;
618		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
619		buf[50] = (1 << 14);
620		buf[53] = (1 << 1 | 1 << 2);
621		if (p->mult_sectors)
622			buf[59] = (0x100 | p->mult_sectors);
623		buf[60] = sectors;
624		buf[61] = (sectors >> 16);
625		buf[63] = 0x7;
626		if (p->xfermode & ATA_WDMA0)
627			buf[63] |= (1 << ((p->xfermode & 7) + 8));
628		buf[64] = 0x3;
629		buf[65] = 100;
630		buf[66] = 100;
631		buf[67] = 100;
632		buf[68] = 100;
633		buf[75] = 31;
634		buf[76] = (1 << 8 | 1 << 2);
635		buf[80] = 0x1f0;
636		buf[81] = 0x28;
637		buf[82] = (1 << 5 | 1 << 14);
638		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
639		buf[84] = (1 << 14);
640		buf[85] = (1 << 5 | 1 << 14);
641		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
642		buf[87] = (1 << 14);
643		buf[88] = 0x7f;
644		if (p->xfermode & ATA_UDMA0)
645			buf[88] |= (1 << ((p->xfermode & 7) + 8));
646		buf[93] = (1 | 1 <<14);
647		buf[100] = sectors;
648		buf[101] = (sectors >> 16);
649		buf[102] = (sectors >> 32);
650		buf[103] = (sectors >> 48);
651		ahci_write_fis_piosetup(p);
652		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
653		p->tfd = ATA_S_DSC | ATA_S_READY;
654		p->is |= AHCI_P_IX_DP;
655		p->ci &= ~(1 << slot);
656	}
657	ahci_generate_intr(p->pr_sc);
658}
659
660static void
661handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
662{
663	if (!p->atapi) {
664		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
665		p->is |= AHCI_P_IX_TFE;
666	} else {
667		uint16_t buf[256];
668
669		memset(buf, 0, sizeof(buf));
670		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
671		/* TODO emulate different serial? */
672		ata_string((uint8_t *)(buf+10), "123456", 20);
673		ata_string((uint8_t *)(buf+23), "001", 8);
674		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
675		buf[49] = (1 << 9 | 1 << 8);
676		buf[50] = (1 << 14 | 1);
677		buf[53] = (1 << 2 | 1 << 1);
678		buf[62] = 0x3f;
679		buf[63] = 7;
680		buf[64] = 3;
681		buf[65] = 100;
682		buf[66] = 100;
683		buf[67] = 100;
684		buf[68] = 100;
685		buf[76] = (1 << 2 | 1 << 1);
686		buf[78] = (1 << 5);
687		buf[80] = (0x1f << 4);
688		buf[82] = (1 << 4);
689		buf[83] = (1 << 14);
690		buf[84] = (1 << 14);
691		buf[85] = (1 << 4);
692		buf[87] = (1 << 14);
693		buf[88] = (1 << 14 | 0x7f);
694		ahci_write_fis_piosetup(p);
695		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
696		p->tfd = ATA_S_DSC | ATA_S_READY;
697		p->is |= AHCI_P_IX_DHR;
698		p->ci &= ~(1 << slot);
699	}
700	ahci_generate_intr(p->pr_sc);
701}
702
703static void
704atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
705{
706	uint8_t buf[36];
707	uint8_t *acmd;
708	int len;
709
710	acmd = cfis + 0x40;
711
712	buf[0] = 0x05;
713	buf[1] = 0x80;
714	buf[2] = 0x00;
715	buf[3] = 0x21;
716	buf[4] = 31;
717	buf[5] = 0;
718	buf[6] = 0;
719	buf[7] = 0;
720	atapi_string(buf + 8, "BHYVE", 8);
721	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
722	atapi_string(buf + 32, "001", 4);
723
724	len = sizeof(buf);
725	if (len > acmd[4])
726		len = acmd[4];
727	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
728	write_prdt(p, slot, cfis, buf, len);
729	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
730}
731
732static void
733atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
734{
735	uint8_t buf[8];
736	uint64_t sectors;
737
738	sectors = blockif_size(p->bctx) / 2048;
739	be32enc(buf, sectors - 1);
740	be32enc(buf + 4, 2048);
741	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
742	write_prdt(p, slot, cfis, buf, sizeof(buf));
743	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
744}
745
746static void
747atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
748{
749	uint8_t *acmd;
750	uint8_t format;
751	int len;
752
753	acmd = cfis + 0x40;
754
755	len = be16dec(acmd + 7);
756	format = acmd[9] >> 6;
757	switch (format) {
758	case 0:
759	{
760		int msf, size;
761		uint64_t sectors;
762		uint8_t start_track, buf[20], *bp;
763
764		msf = (acmd[1] >> 1) & 1;
765		start_track = acmd[6];
766		if (start_track > 1 && start_track != 0xaa) {
767			uint32_t tfd;
768			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
769			p->asc = 0x24;
770			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
771			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
772			ahci_write_fis_d2h(p, slot, cfis, tfd);
773			return;
774		}
775		bp = buf + 2;
776		*bp++ = 1;
777		*bp++ = 1;
778		if (start_track <= 1) {
779			*bp++ = 0;
780			*bp++ = 0x14;
781			*bp++ = 1;
782			*bp++ = 0;
783			if (msf) {
784				*bp++ = 0;
785				lba_to_msf(bp, 0);
786				bp += 3;
787			} else {
788				*bp++ = 0;
789				*bp++ = 0;
790				*bp++ = 0;
791				*bp++ = 0;
792			}
793		}
794		*bp++ = 0;
795		*bp++ = 0x14;
796		*bp++ = 0xaa;
797		*bp++ = 0;
798		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
799		sectors >>= 2;
800		if (msf) {
801			*bp++ = 0;
802			lba_to_msf(bp, sectors);
803			bp += 3;
804		} else {
805			be32enc(bp, sectors);
806			bp += 4;
807		}
808		size = bp - buf;
809		be16enc(buf, size - 2);
810		if (len > size)
811			len = size;
812		write_prdt(p, slot, cfis, buf, len);
813		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
814		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
815		break;
816	}
817	case 1:
818	{
819		uint8_t buf[12];
820
821		memset(buf, 0, sizeof(buf));
822		buf[1] = 0xa;
823		buf[2] = 0x1;
824		buf[3] = 0x1;
825		if (len > sizeof(buf))
826			len = sizeof(buf);
827		write_prdt(p, slot, cfis, buf, len);
828		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
829		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
830		break;
831	}
832	case 2:
833	{
834		int msf, size;
835		uint64_t sectors;
836		uint8_t start_track, *bp, buf[50];
837
838		msf = (acmd[1] >> 1) & 1;
839		start_track = acmd[6];
840		bp = buf + 2;
841		*bp++ = 1;
842		*bp++ = 1;
843
844		*bp++ = 1;
845		*bp++ = 0x14;
846		*bp++ = 0;
847		*bp++ = 0xa0;
848		*bp++ = 0;
849		*bp++ = 0;
850		*bp++ = 0;
851		*bp++ = 0;
852		*bp++ = 1;
853		*bp++ = 0;
854		*bp++ = 0;
855
856		*bp++ = 1;
857		*bp++ = 0x14;
858		*bp++ = 0;
859		*bp++ = 0xa1;
860		*bp++ = 0;
861		*bp++ = 0;
862		*bp++ = 0;
863		*bp++ = 0;
864		*bp++ = 1;
865		*bp++ = 0;
866		*bp++ = 0;
867
868		*bp++ = 1;
869		*bp++ = 0x14;
870		*bp++ = 0;
871		*bp++ = 0xa2;
872		*bp++ = 0;
873		*bp++ = 0;
874		*bp++ = 0;
875		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
876		sectors >>= 2;
877		if (msf) {
878			*bp++ = 0;
879			lba_to_msf(bp, sectors);
880			bp += 3;
881		} else {
882			be32enc(bp, sectors);
883			bp += 4;
884		}
885
886		*bp++ = 1;
887		*bp++ = 0x14;
888		*bp++ = 0;
889		*bp++ = 1;
890		*bp++ = 0;
891		*bp++ = 0;
892		*bp++ = 0;
893		if (msf) {
894			*bp++ = 0;
895			lba_to_msf(bp, 0);
896			bp += 3;
897		} else {
898			*bp++ = 0;
899			*bp++ = 0;
900			*bp++ = 0;
901			*bp++ = 0;
902		}
903
904		size = bp - buf;
905		be16enc(buf, size - 2);
906		if (len > size)
907			len = size;
908		write_prdt(p, slot, cfis, buf, len);
909		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
910		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
911		break;
912	}
913	default:
914	{
915		uint32_t tfd;
916
917		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
918		p->asc = 0x24;
919		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
920		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
921		ahci_write_fis_d2h(p, slot, cfis, tfd);
922		break;
923	}
924	}
925}
926
927static void
928atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
929		uint32_t done, int seek)
930{
931	struct ahci_ioreq *aior;
932	struct ahci_cmd_hdr *hdr;
933	struct ahci_prdt_entry *prdt;
934	struct blockif_req *breq;
935	struct pci_ahci_softc *sc;
936	uint8_t *acmd;
937	uint64_t lba;
938	uint32_t len;
939	int i, err, iovcnt;
940
941	sc = p->pr_sc;
942	acmd = cfis + 0x40;
943	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
944	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
945
946	prdt += seek;
947	lba = be32dec(acmd + 2);
948	if (acmd[0] == READ_10)
949		len = be16dec(acmd + 7);
950	else
951		len = be32dec(acmd + 6);
952	if (len == 0) {
953		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
954		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
955	}
956	lba *= 2048;
957	len *= 2048;
958
959	/*
960	 * Pull request off free list
961	 */
962	aior = STAILQ_FIRST(&p->iofhd);
963	assert(aior != NULL);
964	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
965	aior->cfis = cfis;
966	aior->slot = slot;
967	aior->len = len;
968	aior->done = done;
969	breq = &aior->io_req;
970	breq->br_offset = lba + done;
971	iovcnt = hdr->prdtl - seek;
972	if (iovcnt > BLOCKIF_IOV_MAX) {
973		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
974		iovcnt = BLOCKIF_IOV_MAX;
975	} else
976		aior->prdtl = 0;
977	breq->br_iovcnt = iovcnt;
978
979	/*
980	 * Build up the iovec based on the prdt
981	 */
982	for (i = 0; i < iovcnt; i++) {
983		uint32_t dbcsz;
984
985		dbcsz = (prdt->dbc & DBCMASK) + 1;
986		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
987		    prdt->dba, dbcsz);
988		breq->br_iov[i].iov_len = dbcsz;
989		aior->done += dbcsz;
990		prdt++;
991	}
992	err = blockif_read(p->bctx, breq);
993	assert(err == 0);
994}
995
996static void
997atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
998{
999	uint8_t buf[64];
1000	uint8_t *acmd;
1001	int len;
1002
1003	acmd = cfis + 0x40;
1004	len = acmd[4];
1005	if (len > sizeof(buf))
1006		len = sizeof(buf);
1007	memset(buf, 0, len);
1008	buf[0] = 0x70 | (1 << 7);
1009	buf[2] = p->sense_key;
1010	buf[7] = 10;
1011	buf[12] = p->asc;
1012	write_prdt(p, slot, cfis, buf, len);
1013	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1014	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1015}
1016
1017static void
1018atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1019{
1020	uint8_t *acmd = cfis + 0x40;
1021	uint32_t tfd;
1022
1023	switch (acmd[4] & 3) {
1024	case 0:
1025	case 1:
1026	case 3:
1027		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1028		tfd = ATA_S_READY | ATA_S_DSC;
1029		break;
1030	case 2:
1031		/* TODO eject media */
1032		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1033		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1034		p->asc = 0x53;
1035		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1036		break;
1037	}
1038	ahci_write_fis_d2h(p, slot, cfis, tfd);
1039}
1040
1041static void
1042atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1043{
1044	uint8_t *acmd;
1045	uint32_t tfd;
1046	uint8_t pc, code;
1047	int len;
1048
1049	acmd = cfis + 0x40;
1050	len = be16dec(acmd + 7);
1051	pc = acmd[2] >> 6;
1052	code = acmd[2] & 0x3f;
1053
1054	switch (pc) {
1055	case 0:
1056		switch (code) {
1057		case MODEPAGE_RW_ERROR_RECOVERY:
1058		{
1059			uint8_t buf[16];
1060
1061			if (len > sizeof(buf))
1062				len = sizeof(buf);
1063
1064			memset(buf, 0, sizeof(buf));
1065			be16enc(buf, 16 - 2);
1066			buf[2] = 0x70;
1067			buf[8] = 0x01;
1068			buf[9] = 16 - 10;
1069			buf[11] = 0x05;
1070			write_prdt(p, slot, cfis, buf, len);
1071			tfd = ATA_S_READY | ATA_S_DSC;
1072			break;
1073		}
1074		case MODEPAGE_CD_CAPABILITIES:
1075		{
1076			uint8_t buf[30];
1077
1078			if (len > sizeof(buf))
1079				len = sizeof(buf);
1080
1081			memset(buf, 0, sizeof(buf));
1082			be16enc(buf, 30 - 2);
1083			buf[2] = 0x70;
1084			buf[8] = 0x2A;
1085			buf[9] = 30 - 10;
1086			buf[10] = 0x08;
1087			buf[12] = 0x71;
1088			be16enc(&buf[18], 2);
1089			be16enc(&buf[20], 512);
1090			write_prdt(p, slot, cfis, buf, len);
1091			tfd = ATA_S_READY | ATA_S_DSC;
1092			break;
1093		}
1094		default:
1095			goto error;
1096			break;
1097		}
1098		break;
1099	case 3:
1100		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1101		p->asc = 0x39;
1102		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1103		break;
1104error:
1105	case 1:
1106	case 2:
1107		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1108		p->asc = 0x24;
1109		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1110		break;
1111	}
1112	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1113	ahci_write_fis_d2h(p, slot, cfis, tfd);
1114}
1115
1116static void
1117atapi_get_event_status_notification(struct ahci_port *p, int slot,
1118    uint8_t *cfis)
1119{
1120	uint8_t *acmd;
1121	uint32_t tfd;
1122
1123	acmd = cfis + 0x40;
1124
1125	/* we don't support asynchronous operation */
1126	if (!(acmd[1] & 1)) {
1127		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1128		p->asc = 0x24;
1129		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1130	} else {
1131		uint8_t buf[8];
1132		int len;
1133
1134		len = be16dec(acmd + 7);
1135		if (len > sizeof(buf))
1136			len = sizeof(buf);
1137
1138		memset(buf, 0, sizeof(buf));
1139		be16enc(buf, 8 - 2);
1140		buf[2] = 0x04;
1141		buf[3] = 0x10;
1142		buf[5] = 0x02;
1143		write_prdt(p, slot, cfis, buf, len);
1144		tfd = ATA_S_READY | ATA_S_DSC;
1145	}
1146	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1147	ahci_write_fis_d2h(p, slot, cfis, tfd);
1148}
1149
1150static void
1151handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1152{
1153	uint8_t *acmd;
1154
1155	acmd = cfis + 0x40;
1156
1157#ifdef AHCI_DEBUG
1158	{
1159		int i;
1160		DPRINTF("ACMD:");
1161		for (i = 0; i < 16; i++)
1162			DPRINTF("%02x ", acmd[i]);
1163		DPRINTF("\n");
1164	}
1165#endif
1166
1167	switch (acmd[0]) {
1168	case TEST_UNIT_READY:
1169		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1170		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1171		break;
1172	case INQUIRY:
1173		atapi_inquiry(p, slot, cfis);
1174		break;
1175	case READ_CAPACITY:
1176		atapi_read_capacity(p, slot, cfis);
1177		break;
1178	case PREVENT_ALLOW:
1179		/* TODO */
1180		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1181		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1182		break;
1183	case READ_TOC:
1184		atapi_read_toc(p, slot, cfis);
1185		break;
1186	case READ_10:
1187	case READ_12:
1188		atapi_read(p, slot, cfis, 0, 0);
1189		break;
1190	case REQUEST_SENSE:
1191		atapi_request_sense(p, slot, cfis);
1192		break;
1193	case START_STOP_UNIT:
1194		atapi_start_stop_unit(p, slot, cfis);
1195		break;
1196	case MODE_SENSE_10:
1197		atapi_mode_sense(p, slot, cfis);
1198		break;
1199	case GET_EVENT_STATUS_NOTIFICATION:
1200		atapi_get_event_status_notification(p, slot, cfis);
1201		break;
1202	default:
1203		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1204		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1205		p->asc = 0x20;
1206		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1207				ATA_S_READY | ATA_S_ERROR);
1208		break;
1209	}
1210}
1211
1212static void
1213ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1214{
1215
1216	switch (cfis[2]) {
1217	case ATA_ATA_IDENTIFY:
1218		handle_identify(p, slot, cfis);
1219		break;
1220	case ATA_SETFEATURES:
1221	{
1222		switch (cfis[3]) {
1223		case ATA_SF_ENAB_SATA_SF:
1224			switch (cfis[12]) {
1225			case ATA_SATA_SF_AN:
1226				p->tfd = ATA_S_DSC | ATA_S_READY;
1227				break;
1228			default:
1229				p->tfd = ATA_S_ERROR | ATA_S_READY;
1230				p->tfd |= (ATA_ERROR_ABORT << 8);
1231				break;
1232			}
1233			break;
1234		case ATA_SF_ENAB_WCACHE:
1235		case ATA_SF_DIS_WCACHE:
1236		case ATA_SF_ENAB_RCACHE:
1237		case ATA_SF_DIS_RCACHE:
1238			p->tfd = ATA_S_DSC | ATA_S_READY;
1239			break;
1240		case ATA_SF_SETXFER:
1241		{
1242			switch (cfis[12] & 0xf8) {
1243			case ATA_PIO:
1244			case ATA_PIO0:
1245				break;
1246			case ATA_WDMA0:
1247			case ATA_UDMA0:
1248				p->xfermode = (cfis[12] & 0x7);
1249				break;
1250			}
1251			p->tfd = ATA_S_DSC | ATA_S_READY;
1252			break;
1253		}
1254		default:
1255			p->tfd = ATA_S_ERROR | ATA_S_READY;
1256			p->tfd |= (ATA_ERROR_ABORT << 8);
1257			break;
1258		}
1259		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1260		break;
1261	}
1262	case ATA_SET_MULTI:
1263		if (cfis[12] != 0 &&
1264			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1265			p->tfd = ATA_S_ERROR | ATA_S_READY;
1266			p->tfd |= (ATA_ERROR_ABORT << 8);
1267		} else {
1268			p->mult_sectors = cfis[12];
1269			p->tfd = ATA_S_DSC | ATA_S_READY;
1270		}
1271		p->is |= AHCI_P_IX_DP;
1272		p->ci &= ~(1 << slot);
1273		ahci_generate_intr(p->pr_sc);
1274		break;
1275	case ATA_READ_DMA:
1276	case ATA_WRITE_DMA:
1277	case ATA_READ_DMA48:
1278	case ATA_WRITE_DMA48:
1279	case ATA_READ_FPDMA_QUEUED:
1280	case ATA_WRITE_FPDMA_QUEUED:
1281		ahci_handle_dma(p, slot, cfis, 0, 0);
1282		break;
1283	case ATA_FLUSHCACHE:
1284	case ATA_FLUSHCACHE48:
1285		ahci_handle_flush(p, slot, cfis);
1286		break;
1287	case ATA_STANDBY_CMD:
1288		break;
1289	case ATA_NOP:
1290	case ATA_STANDBY_IMMEDIATE:
1291	case ATA_IDLE_IMMEDIATE:
1292	case ATA_SLEEP:
1293		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1294		break;
1295	case ATA_ATAPI_IDENTIFY:
1296		handle_atapi_identify(p, slot, cfis);
1297		break;
1298	case ATA_PACKET_CMD:
1299		if (!p->atapi) {
1300			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1301			p->is |= AHCI_P_IX_TFE;
1302			ahci_generate_intr(p->pr_sc);
1303		} else
1304			handle_packet_cmd(p, slot, cfis);
1305		break;
1306	default:
1307		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1308		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1309		p->is |= AHCI_P_IX_TFE;
1310		ahci_generate_intr(p->pr_sc);
1311		break;
1312	}
1313}
1314
1315static void
1316ahci_handle_slot(struct ahci_port *p, int slot)
1317{
1318	struct ahci_cmd_hdr *hdr;
1319	struct ahci_prdt_entry *prdt;
1320	struct pci_ahci_softc *sc;
1321	uint8_t *cfis;
1322	int cfl;
1323
1324	sc = p->pr_sc;
1325	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1326	cfl = (hdr->flags & 0x1f) * 4;
1327	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1328			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1329	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1330
1331#ifdef AHCI_DEBUG
1332	DPRINTF("\ncfis:");
1333	for (i = 0; i < cfl; i++) {
1334		if (i % 10 == 0)
1335			DPRINTF("\n");
1336		DPRINTF("%02x ", cfis[i]);
1337	}
1338	DPRINTF("\n");
1339
1340	for (i = 0; i < hdr->prdtl; i++) {
1341		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1342		prdt++;
1343	}
1344#endif
1345
1346	if (cfis[0] != FIS_TYPE_REGH2D) {
1347		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1348		return;
1349	}
1350
1351	if (cfis[1] & 0x80) {
1352		ahci_handle_cmd(p, slot, cfis);
1353	} else {
1354		if (cfis[15] & (1 << 2))
1355			p->reset = 1;
1356		else if (p->reset) {
1357			p->reset = 0;
1358			ahci_port_reset(p);
1359		}
1360		p->ci &= ~(1 << slot);
1361	}
1362}
1363
1364static void
1365ahci_handle_port(struct ahci_port *p)
1366{
1367	int i;
1368
1369	if (!(p->cmd & AHCI_P_CMD_ST))
1370		return;
1371
1372	/*
1373	 * Search for any new commands to issue ignoring those that
1374	 * are already in-flight.
1375	 */
1376	for (i = 0; (i < 32) && p->ci; i++) {
1377		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1378			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1379			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1380			ahci_handle_slot(p, i);
1381		}
1382	}
1383}
1384
1385/*
1386 * blockif callback routine - this runs in the context of the blockif
1387 * i/o thread, so the mutex needs to be acquired.
1388 */
1389static void
1390ata_ioreq_cb(struct blockif_req *br, int err)
1391{
1392	struct ahci_cmd_hdr *hdr;
1393	struct ahci_ioreq *aior;
1394	struct ahci_port *p;
1395	struct pci_ahci_softc *sc;
1396	uint32_t tfd;
1397	uint8_t *cfis;
1398	int pending, slot, ncq;
1399
1400	DPRINTF("%s %d\n", __func__, err);
1401
1402	ncq = 0;
1403	aior = br->br_param;
1404	p = aior->io_pr;
1405	cfis = aior->cfis;
1406	slot = aior->slot;
1407	pending = aior->prdtl;
1408	sc = p->pr_sc;
1409	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1410
1411	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1412			cfis[2] == ATA_READ_FPDMA_QUEUED)
1413		ncq = 1;
1414
1415	pthread_mutex_lock(&sc->mtx);
1416
1417	/*
1418	 * Move the blockif request back to the free list
1419	 */
1420	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1421
1422	if (pending && !err) {
1423		ahci_handle_dma(p, slot, cfis, aior->done,
1424		    hdr->prdtl - pending);
1425		goto out;
1426	}
1427
1428	if (!err && aior->done == aior->len) {
1429		tfd = ATA_S_READY | ATA_S_DSC;
1430		if (ncq)
1431			hdr->prdbc = 0;
1432		else
1433			hdr->prdbc = aior->len;
1434	} else {
1435		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1436		hdr->prdbc = 0;
1437		if (ncq)
1438			p->serr |= (1 << slot);
1439	}
1440
1441	/*
1442	 * This command is now complete.
1443	 */
1444	p->pending &= ~(1 << slot);
1445
1446	if (ncq) {
1447		p->sact &= ~(1 << slot);
1448		ahci_write_fis_sdb(p, slot, tfd);
1449	} else
1450		ahci_write_fis_d2h(p, slot, cfis, tfd);
1451
1452out:
1453	pthread_mutex_unlock(&sc->mtx);
1454	DPRINTF("%s exit\n", __func__);
1455}
1456
1457static void
1458atapi_ioreq_cb(struct blockif_req *br, int err)
1459{
1460	struct ahci_cmd_hdr *hdr;
1461	struct ahci_ioreq *aior;
1462	struct ahci_port *p;
1463	struct pci_ahci_softc *sc;
1464	uint8_t *cfis;
1465	uint32_t tfd;
1466	int pending, slot;
1467
1468	DPRINTF("%s %d\n", __func__, err);
1469
1470	aior = br->br_param;
1471	p = aior->io_pr;
1472	cfis = aior->cfis;
1473	slot = aior->slot;
1474	pending = aior->prdtl;
1475	sc = p->pr_sc;
1476	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1477
1478	pthread_mutex_lock(&sc->mtx);
1479
1480	/*
1481	 * Move the blockif request back to the free list
1482	 */
1483	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1484
1485	if (pending && !err) {
1486		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1487		goto out;
1488	}
1489
1490	if (!err && aior->done == aior->len) {
1491		tfd = ATA_S_READY | ATA_S_DSC;
1492		hdr->prdbc = aior->len;
1493	} else {
1494		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1495		p->asc = 0x21;
1496		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1497		hdr->prdbc = 0;
1498	}
1499
1500	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1501	ahci_write_fis_d2h(p, slot, cfis, tfd);
1502
1503out:
1504	pthread_mutex_unlock(&sc->mtx);
1505	DPRINTF("%s exit\n", __func__);
1506}
1507
1508static void
1509pci_ahci_ioreq_init(struct ahci_port *pr)
1510{
1511	struct ahci_ioreq *vr;
1512	int i;
1513
1514	pr->ioqsz = blockif_queuesz(pr->bctx);
1515	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1516	STAILQ_INIT(&pr->iofhd);
1517
1518	/*
1519	 * Add all i/o request entries to the free queue
1520	 */
1521	for (i = 0; i < pr->ioqsz; i++) {
1522		vr = &pr->ioreq[i];
1523		vr->io_pr = pr;
1524		if (!pr->atapi)
1525			vr->io_req.br_callback = ata_ioreq_cb;
1526		else
1527			vr->io_req.br_callback = atapi_ioreq_cb;
1528		vr->io_req.br_param = vr;
1529		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
1530	}
1531}
1532
1533static void
1534pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1535{
1536	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1537	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1538	struct ahci_port *p = &sc->port[port];
1539
1540	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1541		port, offset, value);
1542
1543	switch (offset) {
1544	case AHCI_P_CLB:
1545		p->clb = value;
1546		break;
1547	case AHCI_P_CLBU:
1548		p->clbu = value;
1549		break;
1550	case AHCI_P_FB:
1551		p->fb = value;
1552		break;
1553	case AHCI_P_FBU:
1554		p->fbu = value;
1555		break;
1556	case AHCI_P_IS:
1557		p->is &= ~value;
1558		break;
1559	case AHCI_P_IE:
1560		p->ie = value & 0xFDC000FF;
1561		ahci_generate_intr(sc);
1562		break;
1563	case AHCI_P_CMD:
1564	{
1565		p->cmd = value;
1566
1567		if (!(value & AHCI_P_CMD_ST)) {
1568			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
1569			p->ci = 0;
1570			p->sact = 0;
1571		} else {
1572			uint64_t clb;
1573
1574			p->cmd |= AHCI_P_CMD_CR;
1575			clb = (uint64_t)p->clbu << 32 | p->clb;
1576			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1577					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1578		}
1579
1580		if (value & AHCI_P_CMD_FRE) {
1581			uint64_t fb;
1582
1583			p->cmd |= AHCI_P_CMD_FR;
1584			fb = (uint64_t)p->fbu << 32 | p->fb;
1585			/* we don't support FBSCP, so rfis size is 256Bytes */
1586			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1587		} else {
1588			p->cmd &= ~AHCI_P_CMD_FR;
1589		}
1590
1591		if (value & AHCI_P_CMD_CLO) {
1592			p->tfd = 0;
1593			p->cmd &= ~AHCI_P_CMD_CLO;
1594		}
1595
1596		ahci_handle_port(p);
1597		break;
1598	}
1599	case AHCI_P_TFD:
1600	case AHCI_P_SIG:
1601	case AHCI_P_SSTS:
1602		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1603		break;
1604	case AHCI_P_SCTL:
1605		if (!(p->cmd & AHCI_P_CMD_ST)) {
1606			if (value & ATA_SC_DET_RESET)
1607				ahci_port_reset(p);
1608			p->sctl = value;
1609		}
1610		break;
1611	case AHCI_P_SERR:
1612		p->serr &= ~value;
1613		break;
1614	case AHCI_P_SACT:
1615		p->sact |= value;
1616		break;
1617	case AHCI_P_CI:
1618		p->ci |= value;
1619		ahci_handle_port(p);
1620		break;
1621	case AHCI_P_SNTF:
1622	case AHCI_P_FBS:
1623	default:
1624		break;
1625	}
1626}
1627
1628static void
1629pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1630{
1631	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1632		offset, value);
1633
1634	switch (offset) {
1635	case AHCI_CAP:
1636	case AHCI_PI:
1637	case AHCI_VS:
1638	case AHCI_CAP2:
1639		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1640		break;
1641	case AHCI_GHC:
1642		if (value & AHCI_GHC_HR)
1643			ahci_reset(sc);
1644		else if (value & AHCI_GHC_IE) {
1645			sc->ghc |= AHCI_GHC_IE;
1646			ahci_generate_intr(sc);
1647		}
1648		break;
1649	case AHCI_IS:
1650		sc->is &= ~value;
1651		ahci_generate_intr(sc);
1652		break;
1653	default:
1654		break;
1655	}
1656}
1657
1658static void
1659pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1660		int baridx, uint64_t offset, int size, uint64_t value)
1661{
1662	struct pci_ahci_softc *sc = pi->pi_arg;
1663
1664	assert(baridx == 5);
1665	assert(size == 4);
1666
1667	pthread_mutex_lock(&sc->mtx);
1668
1669	if (offset < AHCI_OFFSET)
1670		pci_ahci_host_write(sc, offset, value);
1671	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1672		pci_ahci_port_write(sc, offset, value);
1673	else
1674		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1675
1676	pthread_mutex_unlock(&sc->mtx);
1677}
1678
1679static uint64_t
1680pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1681{
1682	uint32_t value;
1683
1684	switch (offset) {
1685	case AHCI_CAP:
1686	case AHCI_GHC:
1687	case AHCI_IS:
1688	case AHCI_PI:
1689	case AHCI_VS:
1690	case AHCI_CCCC:
1691	case AHCI_CCCP:
1692	case AHCI_EM_LOC:
1693	case AHCI_EM_CTL:
1694	case AHCI_CAP2:
1695	{
1696		uint32_t *p = &sc->cap;
1697		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1698		value = *p;
1699		break;
1700	}
1701	default:
1702		value = 0;
1703		break;
1704	}
1705	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1706		offset, value);
1707
1708	return (value);
1709}
1710
1711static uint64_t
1712pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1713{
1714	uint32_t value;
1715	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1716	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1717
1718	switch (offset) {
1719	case AHCI_P_CLB:
1720	case AHCI_P_CLBU:
1721	case AHCI_P_FB:
1722	case AHCI_P_FBU:
1723	case AHCI_P_IS:
1724	case AHCI_P_IE:
1725	case AHCI_P_CMD:
1726	case AHCI_P_TFD:
1727	case AHCI_P_SIG:
1728	case AHCI_P_SSTS:
1729	case AHCI_P_SCTL:
1730	case AHCI_P_SERR:
1731	case AHCI_P_SACT:
1732	case AHCI_P_CI:
1733	case AHCI_P_SNTF:
1734	case AHCI_P_FBS:
1735	{
1736		uint32_t *p= &sc->port[port].clb;
1737		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1738		value = *p;
1739		break;
1740	}
1741	default:
1742		value = 0;
1743		break;
1744	}
1745
1746	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
1747		port, offset, value);
1748
1749	return value;
1750}
1751
1752static uint64_t
1753pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1754    uint64_t offset, int size)
1755{
1756	struct pci_ahci_softc *sc = pi->pi_arg;
1757	uint32_t value;
1758
1759	assert(baridx == 5);
1760	assert(size == 4);
1761
1762	pthread_mutex_lock(&sc->mtx);
1763
1764	if (offset < AHCI_OFFSET)
1765		value = pci_ahci_host_read(sc, offset);
1766	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1767		value = pci_ahci_port_read(sc, offset);
1768	else {
1769		value = 0;
1770		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
1771	}
1772
1773	pthread_mutex_unlock(&sc->mtx);
1774
1775	return (value);
1776}
1777
1778static int
1779pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
1780{
1781	char bident[sizeof("XX:X:X")];
1782	struct blockif_ctxt *bctxt;
1783	struct pci_ahci_softc *sc;
1784	int ret, slots;
1785
1786	ret = 0;
1787
1788	if (opts == NULL) {
1789		fprintf(stderr, "pci_ahci: backing device required\n");
1790		return (1);
1791	}
1792
1793#ifdef AHCI_DEBUG
1794	dbg = fopen("/tmp/log", "w+");
1795#endif
1796
1797	sc = calloc(1, sizeof(struct pci_ahci_softc));
1798	pi->pi_arg = sc;
1799	sc->asc_pi = pi;
1800	sc->ports = MAX_PORTS;
1801
1802	/*
1803	 * Only use port 0 for a backing device. All other ports will be
1804	 * marked as unused
1805	 */
1806	sc->port[0].atapi = atapi;
1807
1808	/*
1809	 * Attempt to open the backing image. Use the PCI
1810	 * slot/func for the identifier string.
1811	 */
1812	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
1813	bctxt = blockif_open(opts, bident);
1814	if (bctxt == NULL) {
1815		ret = 1;
1816		goto open_fail;
1817	}
1818	sc->port[0].bctx = bctxt;
1819	sc->port[0].pr_sc = sc;
1820
1821	/*
1822	 * Allocate blockif request structures and add them
1823	 * to the free list
1824	 */
1825	pci_ahci_ioreq_init(&sc->port[0]);
1826
1827	pthread_mutex_init(&sc->mtx, NULL);
1828
1829	/* Intel ICH8 AHCI */
1830	slots = sc->port[0].ioqsz;
1831	if (slots > 32)
1832		slots = 32;
1833	--slots;
1834	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
1835	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
1836	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
1837	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
1838	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
1839
1840	/* Only port 0 implemented */
1841	sc->pi = 1;
1842	sc->vs = 0x10300;
1843	sc->cap2 = AHCI_CAP2_APST;
1844	ahci_reset(sc);
1845
1846	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
1847	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
1848	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
1849	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
1850	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
1851	pci_emul_add_msicap(pi, 1);
1852	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
1853	    AHCI_OFFSET + sc->ports * AHCI_STEP);
1854
1855	pci_lintr_request(pi);
1856
1857open_fail:
1858	if (ret) {
1859		blockif_close(sc->port[0].bctx);
1860		free(sc);
1861	}
1862
1863	return (ret);
1864}
1865
1866static int
1867pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1868{
1869
1870	return (pci_ahci_init(ctx, pi, opts, 0));
1871}
1872
1873static int
1874pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1875{
1876
1877	return (pci_ahci_init(ctx, pi, opts, 1));
1878}
1879
1880/*
1881 * Use separate emulation names to distinguish drive and atapi devices
1882 */
1883struct pci_devemu pci_de_ahci_hd = {
1884	.pe_emu =	"ahci-hd",
1885	.pe_init =	pci_ahci_hd_init,
1886	.pe_barwrite =	pci_ahci_write,
1887	.pe_barread =	pci_ahci_read
1888};
1889PCI_EMUL_SET(pci_de_ahci_hd);
1890
1891struct pci_devemu pci_de_ahci_cd = {
1892	.pe_emu =	"ahci-cd",
1893	.pe_init =	pci_ahci_atapi_init,
1894	.pe_barwrite =	pci_ahci_write,
1895	.pe_barread =	pci_ahci_read
1896};
1897PCI_EMUL_SET(pci_de_ahci_cd);
1898