pci_ahci.c revision 267339
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 267339 2014-06-10 19:00:14Z jhb $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 267339 2014-06-10 19:00:14Z jhb $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <inttypes.h>
52
53#include "bhyverun.h"
54#include "pci_emul.h"
55#include "ahci.h"
56#include "block_if.h"
57
58#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
59
60#define	PxSIG_ATA	0x00000101 /* ATA drive */
61#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
62
63enum sata_fis_type {
64	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
65	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
66	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
67	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
68	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
69	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
70	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
71	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
72};
73
74/*
75 * SCSI opcodes
76 */
77#define	TEST_UNIT_READY		0x00
78#define	REQUEST_SENSE		0x03
79#define	INQUIRY			0x12
80#define	START_STOP_UNIT		0x1B
81#define	PREVENT_ALLOW		0x1E
82#define	READ_CAPACITY		0x25
83#define	READ_10			0x28
84#define	POSITION_TO_ELEMENT	0x2B
85#define	READ_TOC		0x43
86#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
87#define	MODE_SENSE_10		0x5A
88#define	READ_12			0xA8
89#define	READ_CD			0xBE
90
91/*
92 * SCSI mode page codes
93 */
94#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
95#define	MODEPAGE_CD_CAPABILITIES	0x2A
96
97/*
98 * ATA commands
99 */
100#define	ATA_SF_ENAB_SATA_SF		0x10
101#define		ATA_SATA_SF_AN		0x05
102#define	ATA_SF_DIS_SATA_SF		0x90
103
104/*
105 * Debug printf
106 */
107#ifdef AHCI_DEBUG
108static FILE *dbg;
109#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
110#else
111#define DPRINTF(format, arg...)
112#endif
113#define WPRINTF(format, arg...) printf(format, ##arg)
114
115struct ahci_ioreq {
116	struct blockif_req io_req;
117	struct ahci_port *io_pr;
118	STAILQ_ENTRY(ahci_ioreq) io_list;
119	uint8_t *cfis;
120	uint32_t len;
121	uint32_t done;
122	int slot;
123	int prdtl;
124};
125
126struct ahci_port {
127	struct blockif_ctxt *bctx;
128	struct pci_ahci_softc *pr_sc;
129	uint8_t *cmd_lst;
130	uint8_t *rfis;
131	int atapi;
132	int reset;
133	int mult_sectors;
134	uint8_t xfermode;
135	uint8_t sense_key;
136	uint8_t asc;
137	uint32_t pending;
138
139	uint32_t clb;
140	uint32_t clbu;
141	uint32_t fb;
142	uint32_t fbu;
143	uint32_t is;
144	uint32_t ie;
145	uint32_t cmd;
146	uint32_t unused0;
147	uint32_t tfd;
148	uint32_t sig;
149	uint32_t ssts;
150	uint32_t sctl;
151	uint32_t serr;
152	uint32_t sact;
153	uint32_t ci;
154	uint32_t sntf;
155	uint32_t fbs;
156
157	/*
158	 * i/o request info
159	 */
160	struct ahci_ioreq *ioreq;
161	int ioqsz;
162	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
163};
164
165struct ahci_cmd_hdr {
166	uint16_t flags;
167	uint16_t prdtl;
168	uint32_t prdbc;
169	uint64_t ctba;
170	uint32_t reserved[4];
171};
172
173struct ahci_prdt_entry {
174	uint64_t dba;
175	uint32_t reserved;
176#define	DBCMASK		0x3fffff
177	uint32_t dbc;
178};
179
180struct pci_ahci_softc {
181	struct pci_devinst *asc_pi;
182	pthread_mutex_t	mtx;
183	int ports;
184	uint32_t cap;
185	uint32_t ghc;
186	uint32_t is;
187	uint32_t pi;
188	uint32_t vs;
189	uint32_t ccc_ctl;
190	uint32_t ccc_pts;
191	uint32_t em_loc;
192	uint32_t em_ctl;
193	uint32_t cap2;
194	uint32_t bohc;
195	struct ahci_port port[MAX_PORTS];
196};
197#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
198
199static inline void lba_to_msf(uint8_t *buf, int lba)
200{
201	lba += 150;
202	buf[0] = (lba / 75) / 60;
203	buf[1] = (lba / 75) % 60;
204	buf[2] = lba % 75;
205}
206
207/*
208 * generate HBA intr depending on whether or not ports within
209 * the controller have an interrupt pending.
210 */
211static void
212ahci_generate_intr(struct pci_ahci_softc *sc)
213{
214	int i;
215
216	for (i = 0; i < sc->ports; i++) {
217		struct ahci_port *pr;
218		pr = &sc->port[i];
219		if (pr->is & pr->ie)
220			sc->is |= (1 << i);
221	}
222
223	DPRINTF("%s %x\n", __func__, sc->is);
224
225	if (sc->is && (sc->ghc & AHCI_GHC_IE))
226		pci_generate_msi(sc->asc_pi, 0);
227}
228
229static void
230ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
231{
232	int offset, len, irq;
233
234	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
235		return;
236
237	switch (ft) {
238	case FIS_TYPE_REGD2H:
239		offset = 0x40;
240		len = 20;
241		irq = AHCI_P_IX_DHR;
242		break;
243	case FIS_TYPE_SETDEVBITS:
244		offset = 0x58;
245		len = 8;
246		irq = AHCI_P_IX_SDB;
247		break;
248	case FIS_TYPE_PIOSETUP:
249		offset = 0x20;
250		len = 20;
251		irq = 0;
252		break;
253	default:
254		WPRINTF("unsupported fis type %d\n", ft);
255		return;
256	}
257	memcpy(p->rfis + offset, fis, len);
258	if (irq) {
259		p->is |= irq;
260		ahci_generate_intr(p->pr_sc);
261	}
262}
263
264static void
265ahci_write_fis_piosetup(struct ahci_port *p)
266{
267	uint8_t fis[20];
268
269	memset(fis, 0, sizeof(fis));
270	fis[0] = FIS_TYPE_PIOSETUP;
271	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
272}
273
274static void
275ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
276{
277	uint8_t fis[8];
278	uint8_t error;
279
280	error = (tfd >> 8) & 0xff;
281	memset(fis, 0, sizeof(fis));
282	fis[0] = error;
283	fis[2] = tfd & 0x77;
284	*(uint32_t *)(fis + 4) = (1 << slot);
285	if (fis[2] & ATA_S_ERROR)
286		p->is |= AHCI_P_IX_TFE;
287	p->tfd = tfd;
288	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
289}
290
291static void
292ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
293{
294	uint8_t fis[20];
295	uint8_t error;
296
297	error = (tfd >> 8) & 0xff;
298	memset(fis, 0, sizeof(fis));
299	fis[0] = FIS_TYPE_REGD2H;
300	fis[1] = (1 << 6);
301	fis[2] = tfd & 0xff;
302	fis[3] = error;
303	fis[4] = cfis[4];
304	fis[5] = cfis[5];
305	fis[6] = cfis[6];
306	fis[7] = cfis[7];
307	fis[8] = cfis[8];
308	fis[9] = cfis[9];
309	fis[10] = cfis[10];
310	fis[11] = cfis[11];
311	fis[12] = cfis[12];
312	fis[13] = cfis[13];
313	if (fis[2] & ATA_S_ERROR)
314		p->is |= AHCI_P_IX_TFE;
315	p->tfd = tfd;
316	p->ci &= ~(1 << slot);
317	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
318}
319
320static void
321ahci_write_reset_fis_d2h(struct ahci_port *p)
322{
323	uint8_t fis[20];
324
325	memset(fis, 0, sizeof(fis));
326	fis[0] = FIS_TYPE_REGD2H;
327	fis[3] = 1;
328	fis[4] = 1;
329	if (p->atapi) {
330		fis[5] = 0x14;
331		fis[6] = 0xeb;
332	}
333	fis[12] = 1;
334	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
335}
336
337static void
338ahci_port_reset(struct ahci_port *pr)
339{
340	pr->sctl = 0;
341	pr->serr = 0;
342	pr->sact = 0;
343	pr->xfermode = ATA_UDMA6;
344	pr->mult_sectors = 128;
345
346	if (!pr->bctx) {
347		pr->ssts = ATA_SS_DET_NO_DEVICE;
348		pr->sig = 0xFFFFFFFF;
349		pr->tfd = 0x7F;
350		return;
351	}
352	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
353		ATA_SS_IPM_ACTIVE;
354	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
355	if (!pr->atapi) {
356		pr->sig = PxSIG_ATA;
357		pr->tfd |= ATA_S_READY;
358	} else
359		pr->sig = PxSIG_ATAPI;
360	ahci_write_reset_fis_d2h(pr);
361}
362
363static void
364ahci_reset(struct pci_ahci_softc *sc)
365{
366	int i;
367
368	sc->ghc = AHCI_GHC_AE;
369	sc->is = 0;
370	for (i = 0; i < sc->ports; i++) {
371		sc->port[i].ie = 0;
372		sc->port[i].is = 0;
373		ahci_port_reset(&sc->port[i]);
374	}
375}
376
377static void
378ata_string(uint8_t *dest, const char *src, int len)
379{
380	int i;
381
382	for (i = 0; i < len; i++) {
383		if (*src)
384			dest[i ^ 1] = *src++;
385		else
386			dest[i ^ 1] = ' ';
387	}
388}
389
390static void
391atapi_string(uint8_t *dest, const char *src, int len)
392{
393	int i;
394
395	for (i = 0; i < len; i++) {
396		if (*src)
397			dest[i] = *src++;
398		else
399			dest[i] = ' ';
400	}
401}
402
403static void
404ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
405    int seek)
406{
407	struct ahci_ioreq *aior;
408	struct blockif_req *breq;
409	struct pci_ahci_softc *sc;
410	struct ahci_prdt_entry *prdt;
411	struct ahci_cmd_hdr *hdr;
412	uint64_t lba;
413	uint32_t len;
414	int i, err, iovcnt, ncq, readop;
415
416	sc = p->pr_sc;
417	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
418	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
419	ncq = 0;
420	readop = 1;
421
422	prdt += seek;
423	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
424			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
425		readop = 0;
426
427	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
428			cfis[2] == ATA_READ_FPDMA_QUEUED) {
429		lba = ((uint64_t)cfis[10] << 40) |
430			((uint64_t)cfis[9] << 32) |
431			((uint64_t)cfis[8] << 24) |
432			((uint64_t)cfis[6] << 16) |
433			((uint64_t)cfis[5] << 8) |
434			cfis[4];
435		len = cfis[11] << 8 | cfis[3];
436		if (!len)
437			len = 65536;
438		ncq = 1;
439	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
440		lba = ((uint64_t)cfis[10] << 40) |
441			((uint64_t)cfis[9] << 32) |
442			((uint64_t)cfis[8] << 24) |
443			((uint64_t)cfis[6] << 16) |
444			((uint64_t)cfis[5] << 8) |
445			cfis[4];
446		len = cfis[13] << 8 | cfis[12];
447		if (!len)
448			len = 65536;
449	} else {
450		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
451			(cfis[5] << 8) | cfis[4];
452		len = cfis[12];
453		if (!len)
454			len = 256;
455	}
456	lba *= blockif_sectsz(p->bctx);
457	len *= blockif_sectsz(p->bctx);
458
459	/*
460	 * Pull request off free list
461	 */
462	aior = STAILQ_FIRST(&p->iofhd);
463	assert(aior != NULL);
464	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
465	aior->cfis = cfis;
466	aior->slot = slot;
467	aior->len = len;
468	aior->done = done;
469	breq = &aior->io_req;
470	breq->br_offset = lba + done;
471	iovcnt = hdr->prdtl - seek;
472	if (iovcnt > BLOCKIF_IOV_MAX) {
473		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
474		iovcnt = BLOCKIF_IOV_MAX;
475		/*
476		 * Mark this command in-flight.
477		 */
478		p->pending |= 1 << slot;
479	} else
480		aior->prdtl = 0;
481	breq->br_iovcnt = iovcnt;
482
483	/*
484	 * Build up the iovec based on the prdt
485	 */
486	for (i = 0; i < iovcnt; i++) {
487		uint32_t dbcsz;
488
489		dbcsz = (prdt->dbc & DBCMASK) + 1;
490		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
491		    prdt->dba, dbcsz);
492		breq->br_iov[i].iov_len = dbcsz;
493		aior->done += dbcsz;
494		prdt++;
495	}
496	if (readop)
497		err = blockif_read(p->bctx, breq);
498	else
499		err = blockif_write(p->bctx, breq);
500	assert(err == 0);
501
502	if (ncq)
503		p->ci &= ~(1 << slot);
504}
505
506static void
507ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
508{
509	struct ahci_ioreq *aior;
510	struct blockif_req *breq;
511	int err;
512
513	/*
514	 * Pull request off free list
515	 */
516	aior = STAILQ_FIRST(&p->iofhd);
517	assert(aior != NULL);
518	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
519	aior->cfis = cfis;
520	aior->slot = slot;
521	aior->len = 0;
522	aior->done = 0;
523	aior->prdtl = 0;
524	breq = &aior->io_req;
525
526	err = blockif_flush(p->bctx, breq);
527	assert(err == 0);
528}
529
530static inline void
531write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
532		void *buf, int size)
533{
534	struct ahci_cmd_hdr *hdr;
535	struct ahci_prdt_entry *prdt;
536	void *from;
537	int i, len;
538
539	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
540	len = size;
541	from = buf;
542	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
543	for (i = 0; i < hdr->prdtl && len; i++) {
544		uint8_t *ptr;
545		uint32_t dbcsz;
546		int sublen;
547
548		dbcsz = (prdt->dbc & DBCMASK) + 1;
549		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
550		sublen = len < dbcsz ? len : dbcsz;
551		memcpy(ptr, from, sublen);
552		len -= sublen;
553		from += sublen;
554		prdt++;
555	}
556	hdr->prdbc = size - len;
557}
558
559static void
560handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
561{
562	struct ahci_cmd_hdr *hdr;
563
564	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
565	if (p->atapi || hdr->prdtl == 0) {
566		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
567		p->is |= AHCI_P_IX_TFE;
568	} else {
569		uint16_t buf[256];
570		uint64_t sectors;
571
572		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
573		memset(buf, 0, sizeof(buf));
574		buf[0] = 0x0040;
575		/* TODO emulate different serial? */
576		ata_string((uint8_t *)(buf+10), "123456", 20);
577		ata_string((uint8_t *)(buf+23), "001", 8);
578		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
579		buf[47] = (0x8000 | 128);
580		buf[48] = 0x1;
581		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
582		buf[50] = (1 << 14);
583		buf[53] = (1 << 1 | 1 << 2);
584		if (p->mult_sectors)
585			buf[59] = (0x100 | p->mult_sectors);
586		buf[60] = sectors;
587		buf[61] = (sectors >> 16);
588		buf[63] = 0x7;
589		if (p->xfermode & ATA_WDMA0)
590			buf[63] |= (1 << ((p->xfermode & 7) + 8));
591		buf[64] = 0x3;
592		buf[65] = 100;
593		buf[66] = 100;
594		buf[67] = 100;
595		buf[68] = 100;
596		buf[75] = 31;
597		buf[76] = (1 << 8 | 1 << 2);
598		buf[80] = 0x1f0;
599		buf[81] = 0x28;
600		buf[82] = (1 << 5 | 1 << 14);
601		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
602		buf[84] = (1 << 14);
603		buf[85] = (1 << 5 | 1 << 14);
604		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
605		buf[87] = (1 << 14);
606		buf[88] = 0x7f;
607		if (p->xfermode & ATA_UDMA0)
608			buf[88] |= (1 << ((p->xfermode & 7) + 8));
609		buf[93] = (1 | 1 <<14);
610		buf[100] = sectors;
611		buf[101] = (sectors >> 16);
612		buf[102] = (sectors >> 32);
613		buf[103] = (sectors >> 48);
614		ahci_write_fis_piosetup(p);
615		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
616		p->tfd = ATA_S_DSC | ATA_S_READY;
617		p->is |= AHCI_P_IX_DP;
618	}
619	p->ci &= ~(1 << slot);
620	ahci_generate_intr(p->pr_sc);
621}
622
623static void
624handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
625{
626	if (!p->atapi) {
627		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
628		p->is |= AHCI_P_IX_TFE;
629	} else {
630		uint16_t buf[256];
631
632		memset(buf, 0, sizeof(buf));
633		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
634		/* TODO emulate different serial? */
635		ata_string((uint8_t *)(buf+10), "123456", 20);
636		ata_string((uint8_t *)(buf+23), "001", 8);
637		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
638		buf[49] = (1 << 9 | 1 << 8);
639		buf[50] = (1 << 14 | 1);
640		buf[53] = (1 << 2 | 1 << 1);
641		buf[62] = 0x3f;
642		buf[63] = 7;
643		buf[64] = 3;
644		buf[65] = 100;
645		buf[66] = 100;
646		buf[67] = 100;
647		buf[68] = 100;
648		buf[76] = (1 << 2 | 1 << 1);
649		buf[78] = (1 << 5);
650		buf[80] = (0x1f << 4);
651		buf[82] = (1 << 4);
652		buf[83] = (1 << 14);
653		buf[84] = (1 << 14);
654		buf[85] = (1 << 4);
655		buf[87] = (1 << 14);
656		buf[88] = (1 << 14 | 0x7f);
657		ahci_write_fis_piosetup(p);
658		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
659		p->tfd = ATA_S_DSC | ATA_S_READY;
660		p->is |= AHCI_P_IX_DHR;
661	}
662	p->ci &= ~(1 << slot);
663	ahci_generate_intr(p->pr_sc);
664}
665
666static void
667atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
668{
669	uint8_t buf[36];
670	uint8_t *acmd;
671	int len;
672
673	acmd = cfis + 0x40;
674
675	buf[0] = 0x05;
676	buf[1] = 0x80;
677	buf[2] = 0x00;
678	buf[3] = 0x21;
679	buf[4] = 31;
680	buf[5] = 0;
681	buf[6] = 0;
682	buf[7] = 0;
683	atapi_string(buf + 8, "BHYVE", 8);
684	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
685	atapi_string(buf + 32, "001", 4);
686
687	len = sizeof(buf);
688	if (len > acmd[4])
689		len = acmd[4];
690	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
691	write_prdt(p, slot, cfis, buf, len);
692	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
693}
694
695static void
696atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
697{
698	uint8_t buf[8];
699	uint64_t sectors;
700
701	sectors = blockif_size(p->bctx) / 2048;
702	be32enc(buf, sectors - 1);
703	be32enc(buf + 4, 2048);
704	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
705	write_prdt(p, slot, cfis, buf, sizeof(buf));
706	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
707}
708
709static void
710atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
711{
712	uint8_t *acmd;
713	uint8_t format;
714	int len;
715
716	acmd = cfis + 0x40;
717
718	len = be16dec(acmd + 7);
719	format = acmd[9] >> 6;
720	switch (format) {
721	case 0:
722	{
723		int msf, size;
724		uint64_t sectors;
725		uint8_t start_track, buf[20], *bp;
726
727		msf = (acmd[1] >> 1) & 1;
728		start_track = acmd[6];
729		if (start_track > 1 && start_track != 0xaa) {
730			uint32_t tfd;
731			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
732			p->asc = 0x24;
733			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
734			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
735			ahci_write_fis_d2h(p, slot, cfis, tfd);
736			return;
737		}
738		bp = buf + 2;
739		*bp++ = 1;
740		*bp++ = 1;
741		if (start_track <= 1) {
742			*bp++ = 0;
743			*bp++ = 0x14;
744			*bp++ = 1;
745			*bp++ = 0;
746			if (msf) {
747				*bp++ = 0;
748				lba_to_msf(bp, 0);
749				bp += 3;
750			} else {
751				*bp++ = 0;
752				*bp++ = 0;
753				*bp++ = 0;
754				*bp++ = 0;
755			}
756		}
757		*bp++ = 0;
758		*bp++ = 0x14;
759		*bp++ = 0xaa;
760		*bp++ = 0;
761		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
762		sectors >>= 2;
763		if (msf) {
764			*bp++ = 0;
765			lba_to_msf(bp, sectors);
766			bp += 3;
767		} else {
768			be32enc(bp, sectors);
769			bp += 4;
770		}
771		size = bp - buf;
772		be16enc(buf, size - 2);
773		if (len > size)
774			len = size;
775		write_prdt(p, slot, cfis, buf, len);
776		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
777		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
778		break;
779	}
780	case 1:
781	{
782		uint8_t buf[12];
783
784		memset(buf, 0, sizeof(buf));
785		buf[1] = 0xa;
786		buf[2] = 0x1;
787		buf[3] = 0x1;
788		if (len > sizeof(buf))
789			len = sizeof(buf);
790		write_prdt(p, slot, cfis, buf, len);
791		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
792		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
793		break;
794	}
795	case 2:
796	{
797		int msf, size;
798		uint64_t sectors;
799		uint8_t start_track, *bp, buf[50];
800
801		msf = (acmd[1] >> 1) & 1;
802		start_track = acmd[6];
803		bp = buf + 2;
804		*bp++ = 1;
805		*bp++ = 1;
806
807		*bp++ = 1;
808		*bp++ = 0x14;
809		*bp++ = 0;
810		*bp++ = 0xa0;
811		*bp++ = 0;
812		*bp++ = 0;
813		*bp++ = 0;
814		*bp++ = 0;
815		*bp++ = 1;
816		*bp++ = 0;
817		*bp++ = 0;
818
819		*bp++ = 1;
820		*bp++ = 0x14;
821		*bp++ = 0;
822		*bp++ = 0xa1;
823		*bp++ = 0;
824		*bp++ = 0;
825		*bp++ = 0;
826		*bp++ = 0;
827		*bp++ = 1;
828		*bp++ = 0;
829		*bp++ = 0;
830
831		*bp++ = 1;
832		*bp++ = 0x14;
833		*bp++ = 0;
834		*bp++ = 0xa2;
835		*bp++ = 0;
836		*bp++ = 0;
837		*bp++ = 0;
838		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
839		sectors >>= 2;
840		if (msf) {
841			*bp++ = 0;
842			lba_to_msf(bp, sectors);
843			bp += 3;
844		} else {
845			be32enc(bp, sectors);
846			bp += 4;
847		}
848
849		*bp++ = 1;
850		*bp++ = 0x14;
851		*bp++ = 0;
852		*bp++ = 1;
853		*bp++ = 0;
854		*bp++ = 0;
855		*bp++ = 0;
856		if (msf) {
857			*bp++ = 0;
858			lba_to_msf(bp, 0);
859			bp += 3;
860		} else {
861			*bp++ = 0;
862			*bp++ = 0;
863			*bp++ = 0;
864			*bp++ = 0;
865		}
866
867		size = bp - buf;
868		be16enc(buf, size - 2);
869		if (len > size)
870			len = size;
871		write_prdt(p, slot, cfis, buf, len);
872		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
873		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
874		break;
875	}
876	default:
877	{
878		uint32_t tfd;
879
880		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
881		p->asc = 0x24;
882		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
883		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
884		ahci_write_fis_d2h(p, slot, cfis, tfd);
885		break;
886	}
887	}
888}
889
890static void
891atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
892		uint32_t done, int seek)
893{
894	struct ahci_ioreq *aior;
895	struct ahci_cmd_hdr *hdr;
896	struct ahci_prdt_entry *prdt;
897	struct blockif_req *breq;
898	struct pci_ahci_softc *sc;
899	uint8_t *acmd;
900	uint64_t lba;
901	uint32_t len;
902	int i, err, iovcnt;
903
904	sc = p->pr_sc;
905	acmd = cfis + 0x40;
906	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
907	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
908
909	prdt += seek;
910	lba = be32dec(acmd + 2);
911	if (acmd[0] == READ_10)
912		len = be16dec(acmd + 7);
913	else
914		len = be32dec(acmd + 6);
915	if (len == 0) {
916		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
917		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
918	}
919	lba *= 2048;
920	len *= 2048;
921
922	/*
923	 * Pull request off free list
924	 */
925	aior = STAILQ_FIRST(&p->iofhd);
926	assert(aior != NULL);
927	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
928	aior->cfis = cfis;
929	aior->slot = slot;
930	aior->len = len;
931	aior->done = done;
932	breq = &aior->io_req;
933	breq->br_offset = lba + done;
934	iovcnt = hdr->prdtl - seek;
935	if (iovcnt > BLOCKIF_IOV_MAX) {
936		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
937		iovcnt = BLOCKIF_IOV_MAX;
938	} else
939		aior->prdtl = 0;
940	breq->br_iovcnt = iovcnt;
941
942	/*
943	 * Build up the iovec based on the prdt
944	 */
945	for (i = 0; i < iovcnt; i++) {
946		uint32_t dbcsz;
947
948		dbcsz = (prdt->dbc & DBCMASK) + 1;
949		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
950		    prdt->dba, dbcsz);
951		breq->br_iov[i].iov_len = dbcsz;
952		aior->done += dbcsz;
953		prdt++;
954	}
955	err = blockif_read(p->bctx, breq);
956	assert(err == 0);
957}
958
959static void
960atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
961{
962	uint8_t buf[64];
963	uint8_t *acmd;
964	int len;
965
966	acmd = cfis + 0x40;
967	len = acmd[4];
968	if (len > sizeof(buf))
969		len = sizeof(buf);
970	memset(buf, 0, len);
971	buf[0] = 0x70 | (1 << 7);
972	buf[2] = p->sense_key;
973	buf[7] = 10;
974	buf[12] = p->asc;
975	write_prdt(p, slot, cfis, buf, len);
976	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
977	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
978}
979
980static void
981atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
982{
983	uint8_t *acmd = cfis + 0x40;
984	uint32_t tfd;
985
986	switch (acmd[4] & 3) {
987	case 0:
988	case 1:
989	case 3:
990		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
991		tfd = ATA_S_READY | ATA_S_DSC;
992		break;
993	case 2:
994		/* TODO eject media */
995		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
996		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
997		p->asc = 0x53;
998		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
999		break;
1000	}
1001	ahci_write_fis_d2h(p, slot, cfis, tfd);
1002}
1003
1004static void
1005atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1006{
1007	uint8_t *acmd;
1008	uint32_t tfd;
1009	uint8_t pc, code;
1010	int len;
1011
1012	acmd = cfis + 0x40;
1013	len = be16dec(acmd + 7);
1014	pc = acmd[2] >> 6;
1015	code = acmd[2] & 0x3f;
1016
1017	switch (pc) {
1018	case 0:
1019		switch (code) {
1020		case MODEPAGE_RW_ERROR_RECOVERY:
1021		{
1022			uint8_t buf[16];
1023
1024			if (len > sizeof(buf))
1025				len = sizeof(buf);
1026
1027			memset(buf, 0, sizeof(buf));
1028			be16enc(buf, 16 - 2);
1029			buf[2] = 0x70;
1030			buf[8] = 0x01;
1031			buf[9] = 16 - 10;
1032			buf[11] = 0x05;
1033			write_prdt(p, slot, cfis, buf, len);
1034			tfd = ATA_S_READY | ATA_S_DSC;
1035			break;
1036		}
1037		case MODEPAGE_CD_CAPABILITIES:
1038		{
1039			uint8_t buf[30];
1040
1041			if (len > sizeof(buf))
1042				len = sizeof(buf);
1043
1044			memset(buf, 0, sizeof(buf));
1045			be16enc(buf, 30 - 2);
1046			buf[2] = 0x70;
1047			buf[8] = 0x2A;
1048			buf[9] = 30 - 10;
1049			buf[10] = 0x08;
1050			buf[12] = 0x71;
1051			be16enc(&buf[18], 2);
1052			be16enc(&buf[20], 512);
1053			write_prdt(p, slot, cfis, buf, len);
1054			tfd = ATA_S_READY | ATA_S_DSC;
1055			break;
1056		}
1057		default:
1058			goto error;
1059			break;
1060		}
1061		break;
1062	case 3:
1063		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1064		p->asc = 0x39;
1065		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1066		break;
1067error:
1068	case 1:
1069	case 2:
1070		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1071		p->asc = 0x24;
1072		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1073		break;
1074	}
1075	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1076	ahci_write_fis_d2h(p, slot, cfis, tfd);
1077}
1078
1079static void
1080atapi_get_event_status_notification(struct ahci_port *p, int slot,
1081    uint8_t *cfis)
1082{
1083	uint8_t *acmd;
1084	uint32_t tfd;
1085
1086	acmd = cfis + 0x40;
1087
1088	/* we don't support asynchronous operation */
1089	if (!(acmd[1] & 1)) {
1090		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1091		p->asc = 0x24;
1092		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1093	} else {
1094		uint8_t buf[8];
1095		int len;
1096
1097		len = be16dec(acmd + 7);
1098		if (len > sizeof(buf))
1099			len = sizeof(buf);
1100
1101		memset(buf, 0, sizeof(buf));
1102		be16enc(buf, 8 - 2);
1103		buf[2] = 0x04;
1104		buf[3] = 0x10;
1105		buf[5] = 0x02;
1106		write_prdt(p, slot, cfis, buf, len);
1107		tfd = ATA_S_READY | ATA_S_DSC;
1108	}
1109	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1110	ahci_write_fis_d2h(p, slot, cfis, tfd);
1111}
1112
1113static void
1114handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1115{
1116	uint8_t *acmd;
1117
1118	acmd = cfis + 0x40;
1119
1120#ifdef AHCI_DEBUG
1121	{
1122		int i;
1123		DPRINTF("ACMD:");
1124		for (i = 0; i < 16; i++)
1125			DPRINTF("%02x ", acmd[i]);
1126		DPRINTF("\n");
1127	}
1128#endif
1129
1130	switch (acmd[0]) {
1131	case TEST_UNIT_READY:
1132		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1133		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1134		break;
1135	case INQUIRY:
1136		atapi_inquiry(p, slot, cfis);
1137		break;
1138	case READ_CAPACITY:
1139		atapi_read_capacity(p, slot, cfis);
1140		break;
1141	case PREVENT_ALLOW:
1142		/* TODO */
1143		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1144		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1145		break;
1146	case READ_TOC:
1147		atapi_read_toc(p, slot, cfis);
1148		break;
1149	case READ_10:
1150	case READ_12:
1151		atapi_read(p, slot, cfis, 0, 0);
1152		break;
1153	case REQUEST_SENSE:
1154		atapi_request_sense(p, slot, cfis);
1155		break;
1156	case START_STOP_UNIT:
1157		atapi_start_stop_unit(p, slot, cfis);
1158		break;
1159	case MODE_SENSE_10:
1160		atapi_mode_sense(p, slot, cfis);
1161		break;
1162	case GET_EVENT_STATUS_NOTIFICATION:
1163		atapi_get_event_status_notification(p, slot, cfis);
1164		break;
1165	default:
1166		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1167		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1168		p->asc = 0x20;
1169		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1170				ATA_S_READY | ATA_S_ERROR);
1171		break;
1172	}
1173}
1174
1175static void
1176ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1177{
1178
1179	switch (cfis[2]) {
1180	case ATA_ATA_IDENTIFY:
1181		handle_identify(p, slot, cfis);
1182		break;
1183	case ATA_SETFEATURES:
1184	{
1185		switch (cfis[3]) {
1186		case ATA_SF_ENAB_SATA_SF:
1187			switch (cfis[12]) {
1188			case ATA_SATA_SF_AN:
1189				p->tfd = ATA_S_DSC | ATA_S_READY;
1190				break;
1191			default:
1192				p->tfd = ATA_S_ERROR | ATA_S_READY;
1193				p->tfd |= (ATA_ERROR_ABORT << 8);
1194				break;
1195			}
1196			break;
1197		case ATA_SF_ENAB_WCACHE:
1198		case ATA_SF_DIS_WCACHE:
1199		case ATA_SF_ENAB_RCACHE:
1200		case ATA_SF_DIS_RCACHE:
1201			p->tfd = ATA_S_DSC | ATA_S_READY;
1202			break;
1203		case ATA_SF_SETXFER:
1204		{
1205			switch (cfis[12] & 0xf8) {
1206			case ATA_PIO:
1207			case ATA_PIO0:
1208				break;
1209			case ATA_WDMA0:
1210			case ATA_UDMA0:
1211				p->xfermode = (cfis[12] & 0x7);
1212				break;
1213			}
1214			p->tfd = ATA_S_DSC | ATA_S_READY;
1215			break;
1216		}
1217		default:
1218			p->tfd = ATA_S_ERROR | ATA_S_READY;
1219			p->tfd |= (ATA_ERROR_ABORT << 8);
1220			break;
1221		}
1222		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1223		break;
1224	}
1225	case ATA_SET_MULTI:
1226		if (cfis[12] != 0 &&
1227			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1228			p->tfd = ATA_S_ERROR | ATA_S_READY;
1229			p->tfd |= (ATA_ERROR_ABORT << 8);
1230		} else {
1231			p->mult_sectors = cfis[12];
1232			p->tfd = ATA_S_DSC | ATA_S_READY;
1233		}
1234		p->is |= AHCI_P_IX_DP;
1235		p->ci &= ~(1 << slot);
1236		ahci_generate_intr(p->pr_sc);
1237		break;
1238	case ATA_READ_DMA:
1239	case ATA_WRITE_DMA:
1240	case ATA_READ_DMA48:
1241	case ATA_WRITE_DMA48:
1242	case ATA_READ_FPDMA_QUEUED:
1243	case ATA_WRITE_FPDMA_QUEUED:
1244		ahci_handle_dma(p, slot, cfis, 0, 0);
1245		break;
1246	case ATA_FLUSHCACHE:
1247	case ATA_FLUSHCACHE48:
1248		ahci_handle_flush(p, slot, cfis);
1249		break;
1250	case ATA_STANDBY_CMD:
1251		break;
1252	case ATA_NOP:
1253	case ATA_STANDBY_IMMEDIATE:
1254	case ATA_IDLE_IMMEDIATE:
1255	case ATA_SLEEP:
1256		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1257		break;
1258	case ATA_ATAPI_IDENTIFY:
1259		handle_atapi_identify(p, slot, cfis);
1260		break;
1261	case ATA_PACKET_CMD:
1262		if (!p->atapi) {
1263			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1264			p->is |= AHCI_P_IX_TFE;
1265			p->ci &= ~(1 << slot);
1266			ahci_generate_intr(p->pr_sc);
1267		} else
1268			handle_packet_cmd(p, slot, cfis);
1269		break;
1270	default:
1271		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1272		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1273		p->is |= AHCI_P_IX_TFE;
1274		p->ci &= ~(1 << slot);
1275		ahci_generate_intr(p->pr_sc);
1276		break;
1277	}
1278}
1279
1280static void
1281ahci_handle_slot(struct ahci_port *p, int slot)
1282{
1283	struct ahci_cmd_hdr *hdr;
1284	struct ahci_prdt_entry *prdt;
1285	struct pci_ahci_softc *sc;
1286	uint8_t *cfis;
1287	int cfl;
1288
1289	sc = p->pr_sc;
1290	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1291	cfl = (hdr->flags & 0x1f) * 4;
1292	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1293			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1294	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1295
1296#ifdef AHCI_DEBUG
1297	DPRINTF("\ncfis:");
1298	for (i = 0; i < cfl; i++) {
1299		if (i % 10 == 0)
1300			DPRINTF("\n");
1301		DPRINTF("%02x ", cfis[i]);
1302	}
1303	DPRINTF("\n");
1304
1305	for (i = 0; i < hdr->prdtl; i++) {
1306		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1307		prdt++;
1308	}
1309#endif
1310
1311	if (cfis[0] != FIS_TYPE_REGH2D) {
1312		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1313		return;
1314	}
1315
1316	if (cfis[1] & 0x80) {
1317		ahci_handle_cmd(p, slot, cfis);
1318	} else {
1319		if (cfis[15] & (1 << 2))
1320			p->reset = 1;
1321		else if (p->reset) {
1322			p->reset = 0;
1323			ahci_port_reset(p);
1324		}
1325		p->ci &= ~(1 << slot);
1326	}
1327}
1328
1329static void
1330ahci_handle_port(struct ahci_port *p)
1331{
1332	int i;
1333
1334	if (!(p->cmd & AHCI_P_CMD_ST))
1335		return;
1336
1337	/*
1338	 * Search for any new commands to issue ignoring those that
1339	 * are already in-flight.
1340	 */
1341	for (i = 0; (i < 32) && p->ci; i++) {
1342		if ((p->ci & (1 << i)) && !(p->pending & (1 << i)))
1343			ahci_handle_slot(p, i);
1344	}
1345}
1346
1347/*
1348 * blockif callback routine - this runs in the context of the blockif
1349 * i/o thread, so the mutex needs to be acquired.
1350 */
1351static void
1352ata_ioreq_cb(struct blockif_req *br, int err)
1353{
1354	struct ahci_cmd_hdr *hdr;
1355	struct ahci_ioreq *aior;
1356	struct ahci_port *p;
1357	struct pci_ahci_softc *sc;
1358	uint32_t tfd;
1359	uint8_t *cfis;
1360	int pending, slot, ncq;
1361
1362	DPRINTF("%s %d\n", __func__, err);
1363
1364	ncq = 0;
1365	aior = br->br_param;
1366	p = aior->io_pr;
1367	cfis = aior->cfis;
1368	slot = aior->slot;
1369	pending = aior->prdtl;
1370	sc = p->pr_sc;
1371	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1372
1373	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1374			cfis[2] == ATA_READ_FPDMA_QUEUED)
1375		ncq = 1;
1376
1377	pthread_mutex_lock(&sc->mtx);
1378
1379	/*
1380	 * Move the blockif request back to the free list
1381	 */
1382	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1383
1384	if (pending && !err) {
1385		ahci_handle_dma(p, slot, cfis, aior->done,
1386		    hdr->prdtl - pending);
1387		goto out;
1388	}
1389
1390	if (!err && aior->done == aior->len) {
1391		tfd = ATA_S_READY | ATA_S_DSC;
1392		if (ncq)
1393			hdr->prdbc = 0;
1394		else
1395			hdr->prdbc = aior->len;
1396	} else {
1397		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1398		hdr->prdbc = 0;
1399		if (ncq)
1400			p->serr |= (1 << slot);
1401	}
1402
1403	/*
1404	 * This command is now complete.
1405	 */
1406	p->pending &= ~(1 << slot);
1407
1408	if (ncq) {
1409		p->sact &= ~(1 << slot);
1410		ahci_write_fis_sdb(p, slot, tfd);
1411	} else
1412		ahci_write_fis_d2h(p, slot, cfis, tfd);
1413
1414out:
1415	pthread_mutex_unlock(&sc->mtx);
1416	DPRINTF("%s exit\n", __func__);
1417}
1418
1419static void
1420atapi_ioreq_cb(struct blockif_req *br, int err)
1421{
1422	struct ahci_cmd_hdr *hdr;
1423	struct ahci_ioreq *aior;
1424	struct ahci_port *p;
1425	struct pci_ahci_softc *sc;
1426	uint8_t *cfis;
1427	uint32_t tfd;
1428	int pending, slot;
1429
1430	DPRINTF("%s %d\n", __func__, err);
1431
1432	aior = br->br_param;
1433	p = aior->io_pr;
1434	cfis = aior->cfis;
1435	slot = aior->slot;
1436	pending = aior->prdtl;
1437	sc = p->pr_sc;
1438	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1439
1440	pthread_mutex_lock(&sc->mtx);
1441
1442	/*
1443	 * Move the blockif request back to the free list
1444	 */
1445	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1446
1447	if (pending && !err) {
1448		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1449		goto out;
1450	}
1451
1452	if (!err && aior->done == aior->len) {
1453		tfd = ATA_S_READY | ATA_S_DSC;
1454		hdr->prdbc = aior->len;
1455	} else {
1456		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1457		p->asc = 0x21;
1458		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1459		hdr->prdbc = 0;
1460	}
1461
1462	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1463	ahci_write_fis_d2h(p, slot, cfis, tfd);
1464
1465out:
1466	pthread_mutex_unlock(&sc->mtx);
1467	DPRINTF("%s exit\n", __func__);
1468}
1469
1470static void
1471pci_ahci_ioreq_init(struct ahci_port *pr)
1472{
1473	struct ahci_ioreq *vr;
1474	int i;
1475
1476	pr->ioqsz = blockif_queuesz(pr->bctx);
1477	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1478	STAILQ_INIT(&pr->iofhd);
1479
1480	/*
1481	 * Add all i/o request entries to the free queue
1482	 */
1483	for (i = 0; i < pr->ioqsz; i++) {
1484		vr = &pr->ioreq[i];
1485		vr->io_pr = pr;
1486		if (!pr->atapi)
1487			vr->io_req.br_callback = ata_ioreq_cb;
1488		else
1489			vr->io_req.br_callback = atapi_ioreq_cb;
1490		vr->io_req.br_param = vr;
1491		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
1492	}
1493}
1494
1495static void
1496pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1497{
1498	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1499	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1500	struct ahci_port *p = &sc->port[port];
1501
1502	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1503		port, offset, value);
1504
1505	switch (offset) {
1506	case AHCI_P_CLB:
1507		p->clb = value;
1508		break;
1509	case AHCI_P_CLBU:
1510		p->clbu = value;
1511		break;
1512	case AHCI_P_FB:
1513		p->fb = value;
1514		break;
1515	case AHCI_P_FBU:
1516		p->fbu = value;
1517		break;
1518	case AHCI_P_IS:
1519		p->is &= ~value;
1520		break;
1521	case AHCI_P_IE:
1522		p->ie = value & 0xFDC000FF;
1523		ahci_generate_intr(sc);
1524		break;
1525	case AHCI_P_CMD:
1526	{
1527		p->cmd = value;
1528
1529		if (!(value & AHCI_P_CMD_ST)) {
1530			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
1531			p->ci = 0;
1532			p->sact = 0;
1533		} else {
1534			uint64_t clb;
1535
1536			p->cmd |= AHCI_P_CMD_CR;
1537			clb = (uint64_t)p->clbu << 32 | p->clb;
1538			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1539					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1540		}
1541
1542		if (value & AHCI_P_CMD_FRE) {
1543			uint64_t fb;
1544
1545			p->cmd |= AHCI_P_CMD_FR;
1546			fb = (uint64_t)p->fbu << 32 | p->fb;
1547			/* we don't support FBSCP, so rfis size is 256Bytes */
1548			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1549		} else {
1550			p->cmd &= ~AHCI_P_CMD_FR;
1551		}
1552
1553		if (value & AHCI_P_CMD_CLO) {
1554			p->tfd = 0;
1555			p->cmd &= ~AHCI_P_CMD_CLO;
1556		}
1557
1558		ahci_handle_port(p);
1559		break;
1560	}
1561	case AHCI_P_TFD:
1562	case AHCI_P_SIG:
1563	case AHCI_P_SSTS:
1564		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1565		break;
1566	case AHCI_P_SCTL:
1567		if (!(p->cmd & AHCI_P_CMD_ST)) {
1568			if (value & ATA_SC_DET_RESET)
1569				ahci_port_reset(p);
1570			p->sctl = value;
1571		}
1572		break;
1573	case AHCI_P_SERR:
1574		p->serr &= ~value;
1575		break;
1576	case AHCI_P_SACT:
1577		p->sact |= value;
1578		break;
1579	case AHCI_P_CI:
1580		p->ci |= value;
1581		ahci_handle_port(p);
1582		break;
1583	case AHCI_P_SNTF:
1584	case AHCI_P_FBS:
1585	default:
1586		break;
1587	}
1588}
1589
1590static void
1591pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1592{
1593	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1594		offset, value);
1595
1596	switch (offset) {
1597	case AHCI_CAP:
1598	case AHCI_PI:
1599	case AHCI_VS:
1600	case AHCI_CAP2:
1601		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1602		break;
1603	case AHCI_GHC:
1604		if (value & AHCI_GHC_HR)
1605			ahci_reset(sc);
1606		else if (value & AHCI_GHC_IE) {
1607			sc->ghc |= AHCI_GHC_IE;
1608			ahci_generate_intr(sc);
1609		}
1610		break;
1611	case AHCI_IS:
1612		sc->is &= ~value;
1613		ahci_generate_intr(sc);
1614		break;
1615	default:
1616		break;
1617	}
1618}
1619
1620static void
1621pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1622		int baridx, uint64_t offset, int size, uint64_t value)
1623{
1624	struct pci_ahci_softc *sc = pi->pi_arg;
1625
1626	assert(baridx == 5);
1627	assert(size == 4);
1628
1629	pthread_mutex_lock(&sc->mtx);
1630
1631	if (offset < AHCI_OFFSET)
1632		pci_ahci_host_write(sc, offset, value);
1633	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1634		pci_ahci_port_write(sc, offset, value);
1635	else
1636		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1637
1638	pthread_mutex_unlock(&sc->mtx);
1639}
1640
1641static uint64_t
1642pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1643{
1644	uint32_t value;
1645
1646	switch (offset) {
1647	case AHCI_CAP:
1648	case AHCI_GHC:
1649	case AHCI_IS:
1650	case AHCI_PI:
1651	case AHCI_VS:
1652	case AHCI_CCCC:
1653	case AHCI_CCCP:
1654	case AHCI_EM_LOC:
1655	case AHCI_EM_CTL:
1656	case AHCI_CAP2:
1657	{
1658		uint32_t *p = &sc->cap;
1659		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1660		value = *p;
1661		break;
1662	}
1663	default:
1664		value = 0;
1665		break;
1666	}
1667	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1668		offset, value);
1669
1670	return (value);
1671}
1672
1673static uint64_t
1674pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1675{
1676	uint32_t value;
1677	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1678	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1679
1680	switch (offset) {
1681	case AHCI_P_CLB:
1682	case AHCI_P_CLBU:
1683	case AHCI_P_FB:
1684	case AHCI_P_FBU:
1685	case AHCI_P_IS:
1686	case AHCI_P_IE:
1687	case AHCI_P_CMD:
1688	case AHCI_P_TFD:
1689	case AHCI_P_SIG:
1690	case AHCI_P_SSTS:
1691	case AHCI_P_SCTL:
1692	case AHCI_P_SERR:
1693	case AHCI_P_SACT:
1694	case AHCI_P_CI:
1695	case AHCI_P_SNTF:
1696	case AHCI_P_FBS:
1697	{
1698		uint32_t *p= &sc->port[port].clb;
1699		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1700		value = *p;
1701		break;
1702	}
1703	default:
1704		value = 0;
1705		break;
1706	}
1707
1708	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
1709		port, offset, value);
1710
1711	return value;
1712}
1713
1714static uint64_t
1715pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1716    uint64_t offset, int size)
1717{
1718	struct pci_ahci_softc *sc = pi->pi_arg;
1719	uint32_t value;
1720
1721	assert(baridx == 5);
1722	assert(size == 4);
1723
1724	pthread_mutex_lock(&sc->mtx);
1725
1726	if (offset < AHCI_OFFSET)
1727		value = pci_ahci_host_read(sc, offset);
1728	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1729		value = pci_ahci_port_read(sc, offset);
1730	else {
1731		value = 0;
1732		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
1733	}
1734
1735	pthread_mutex_unlock(&sc->mtx);
1736
1737	return (value);
1738}
1739
1740static int
1741pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
1742{
1743	char bident[sizeof("XX:X:X")];
1744	struct blockif_ctxt *bctxt;
1745	struct pci_ahci_softc *sc;
1746	int ret, slots;
1747
1748	ret = 0;
1749
1750	if (opts == NULL) {
1751		fprintf(stderr, "pci_ahci: backing device required\n");
1752		return (1);
1753	}
1754
1755#ifdef AHCI_DEBUG
1756	dbg = fopen("/tmp/log", "w+");
1757#endif
1758
1759       	sc = malloc(sizeof(struct pci_ahci_softc));
1760	memset(sc, 0, sizeof(struct pci_ahci_softc));
1761	pi->pi_arg = sc;
1762	sc->asc_pi = pi;
1763	sc->ports = MAX_PORTS;
1764
1765	/*
1766	 * Only use port 0 for a backing device. All other ports will be
1767	 * marked as unused
1768	 */
1769	sc->port[0].atapi = atapi;
1770
1771	/*
1772	 * Attempt to open the backing image. Use the PCI
1773	 * slot/func for the identifier string.
1774	 */
1775	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
1776	bctxt = blockif_open(opts, bident);
1777	if (bctxt == NULL) {
1778		ret = 1;
1779		goto open_fail;
1780	}
1781	sc->port[0].bctx = bctxt;
1782	sc->port[0].pr_sc = sc;
1783
1784	/*
1785	 * Allocate blockif request structures and add them
1786	 * to the free list
1787	 */
1788	pci_ahci_ioreq_init(&sc->port[0]);
1789
1790	pthread_mutex_init(&sc->mtx, NULL);
1791
1792	/* Intel ICH8 AHCI */
1793	slots = sc->port[0].ioqsz;
1794	if (slots > 32)
1795		slots = 32;
1796	--slots;
1797	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
1798	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
1799	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
1800	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
1801	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
1802
1803	/* Only port 0 implemented */
1804	sc->pi = 1;
1805	sc->vs = 0x10300;
1806	sc->cap2 = AHCI_CAP2_APST;
1807	ahci_reset(sc);
1808
1809	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
1810	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
1811	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
1812	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
1813	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
1814	pci_emul_add_msicap(pi, 1);
1815	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
1816	    AHCI_OFFSET + sc->ports * AHCI_STEP);
1817
1818open_fail:
1819	if (ret) {
1820		blockif_close(sc->port[0].bctx);
1821		free(sc);
1822	}
1823
1824	return (ret);
1825}
1826
1827static int
1828pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1829{
1830
1831	return (pci_ahci_init(ctx, pi, opts, 0));
1832}
1833
1834static int
1835pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1836{
1837
1838	return (pci_ahci_init(ctx, pi, opts, 1));
1839}
1840
1841/*
1842 * Use separate emulation names to distinguish drive and atapi devices
1843 */
1844struct pci_devemu pci_de_ahci_hd = {
1845	.pe_emu =	"ahci-hd",
1846	.pe_init =	pci_ahci_hd_init,
1847	.pe_barwrite =	pci_ahci_write,
1848	.pe_barread =	pci_ahci_read
1849};
1850PCI_EMUL_SET(pci_de_ahci_hd);
1851
1852struct pci_devemu pci_de_ahci_cd = {
1853	.pe_emu =	"ahci-cd",
1854	.pe_init =	pci_ahci_atapi_init,
1855	.pe_barwrite =	pci_ahci_write,
1856	.pe_barread =	pci_ahci_read
1857};
1858PCI_EMUL_SET(pci_de_ahci_cd);
1859