pci_ahci.c revision 259301
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 259301 2013-12-13 06:59:18Z grehan $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 259301 2013-12-13 06:59:18Z grehan $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <inttypes.h>
52
53#include "bhyverun.h"
54#include "pci_emul.h"
55#include "ahci.h"
56#include "block_if.h"
57
58#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
59
60#define	PxSIG_ATA	0x00000101 /* ATA drive */
61#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
62
63enum sata_fis_type {
64	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
65	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
66	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
67	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
68	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
69	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
70	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
71	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
72};
73
74/*
75 * SCSI opcodes
76 */
77#define	TEST_UNIT_READY		0x00
78#define	REQUEST_SENSE		0x03
79#define	INQUIRY			0x12
80#define	START_STOP_UNIT		0x1B
81#define	PREVENT_ALLOW		0x1E
82#define	READ_CAPACITY		0x25
83#define	READ_10			0x28
84#define	POSITION_TO_ELEMENT	0x2B
85#define	READ_TOC		0x43
86#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
87#define	MODE_SENSE_10		0x5A
88#define	READ_12			0xA8
89#define	READ_CD			0xBE
90
91/*
92 * SCSI mode page codes
93 */
94#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
95#define	MODEPAGE_CD_CAPABILITIES	0x2A
96
97/*
98 * Debug printf
99 */
100#ifdef AHCI_DEBUG
101static FILE *dbg;
102#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
103#else
104#define DPRINTF(format, arg...)
105#endif
106#define WPRINTF(format, arg...) printf(format, ##arg)
107
108struct ahci_ioreq {
109	struct blockif_req io_req;
110	struct ahci_port *io_pr;
111	STAILQ_ENTRY(ahci_ioreq) io_list;
112	uint8_t *cfis;
113	uint32_t len;
114	uint32_t done;
115	int slot;
116	int prdtl;
117};
118
119struct ahci_port {
120	struct blockif_ctxt *bctx;
121	struct pci_ahci_softc *pr_sc;
122	uint8_t *cmd_lst;
123	uint8_t *rfis;
124	int atapi;
125	int reset;
126	int mult_sectors;
127	uint8_t xfermode;
128	uint8_t sense_key;
129	uint8_t asc;
130
131	uint32_t clb;
132	uint32_t clbu;
133	uint32_t fb;
134	uint32_t fbu;
135	uint32_t is;
136	uint32_t ie;
137	uint32_t cmd;
138	uint32_t unused0;
139	uint32_t tfd;
140	uint32_t sig;
141	uint32_t ssts;
142	uint32_t sctl;
143	uint32_t serr;
144	uint32_t sact;
145	uint32_t ci;
146	uint32_t sntf;
147	uint32_t fbs;
148
149	/*
150	 * i/o request info
151	 */
152	struct ahci_ioreq *ioreq;
153	int ioqsz;
154	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
155};
156
157struct ahci_cmd_hdr {
158	uint16_t flags;
159	uint16_t prdtl;
160	uint32_t prdbc;
161	uint64_t ctba;
162	uint32_t reserved[4];
163};
164
165struct ahci_prdt_entry {
166	uint64_t dba;
167	uint32_t reserved;
168#define	DBCMASK		0x3fffff
169	uint32_t dbc;
170};
171
172struct pci_ahci_softc {
173	struct pci_devinst *asc_pi;
174	pthread_mutex_t	mtx;
175	int ports;
176	uint32_t cap;
177	uint32_t ghc;
178	uint32_t is;
179	uint32_t pi;
180	uint32_t vs;
181	uint32_t ccc_ctl;
182	uint32_t ccc_pts;
183	uint32_t em_loc;
184	uint32_t em_ctl;
185	uint32_t cap2;
186	uint32_t bohc;
187	struct ahci_port port[MAX_PORTS];
188};
189#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
190
191static inline void lba_to_msf(uint8_t *buf, int lba)
192{
193	lba += 150;
194	buf[0] = (lba / 75) / 60;
195	buf[1] = (lba / 75) % 60;
196	buf[2] = lba % 75;
197}
198
199/*
200 * generate HBA intr depending on whether or not ports within
201 * the controller have an interrupt pending.
202 */
203static void
204ahci_generate_intr(struct pci_ahci_softc *sc)
205{
206	int i;
207
208	for (i = 0; i < sc->ports; i++) {
209		struct ahci_port *pr;
210		pr = &sc->port[i];
211		if (pr->is & pr->ie)
212			sc->is |= (1 << i);
213	}
214
215	DPRINTF("%s %x\n", __func__, sc->is);
216
217	if (sc->is && (sc->ghc & AHCI_GHC_IE))
218		pci_generate_msi(sc->asc_pi, 0);
219}
220
221static void
222ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
223{
224	int offset, len, irq;
225
226	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
227		return;
228
229	switch (ft) {
230	case FIS_TYPE_REGD2H:
231		offset = 0x40;
232		len = 20;
233		irq = AHCI_P_IX_DHR;
234		break;
235	case FIS_TYPE_SETDEVBITS:
236		offset = 0x58;
237		len = 8;
238		irq = AHCI_P_IX_SDB;
239		break;
240	case FIS_TYPE_PIOSETUP:
241		offset = 0x20;
242		len = 20;
243		irq = 0;
244		break;
245	default:
246		WPRINTF("unsupported fis type %d\n", ft);
247		return;
248	}
249	memcpy(p->rfis + offset, fis, len);
250	if (irq) {
251		p->is |= irq;
252		ahci_generate_intr(p->pr_sc);
253	}
254}
255
256static void
257ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
258{
259	uint8_t fis[8];
260	uint8_t error;
261
262	error = (tfd >> 8) & 0xff;
263	memset(fis, 0, sizeof(fis));
264	fis[0] = error;
265	fis[2] = tfd & 0x77;
266	*(uint32_t *)(fis + 4) = (1 << slot);
267	if (fis[2] & ATA_S_ERROR)
268		p->is |= AHCI_P_IX_TFE;
269	p->tfd = tfd;
270	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
271}
272
273static void
274ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
275{
276	uint8_t fis[20];
277	uint8_t error;
278
279	error = (tfd >> 8) & 0xff;
280	memset(fis, 0, sizeof(fis));
281	fis[0] = FIS_TYPE_REGD2H;
282	fis[1] = (1 << 6);
283	fis[2] = tfd & 0xff;
284	fis[3] = error;
285	fis[4] = cfis[4];
286	fis[5] = cfis[5];
287	fis[6] = cfis[6];
288	fis[7] = cfis[7];
289	fis[8] = cfis[8];
290	fis[9] = cfis[9];
291	fis[10] = cfis[10];
292	fis[11] = cfis[11];
293	fis[12] = cfis[12];
294	fis[13] = cfis[13];
295	if (fis[2] & ATA_S_ERROR)
296		p->is |= AHCI_P_IX_TFE;
297	p->tfd = tfd;
298	p->ci &= ~(1 << slot);
299	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
300}
301
302static void
303ahci_write_reset_fis_d2h(struct ahci_port *p)
304{
305	uint8_t fis[20];
306
307	memset(fis, 0, sizeof(fis));
308	fis[0] = FIS_TYPE_REGD2H;
309	fis[3] = 1;
310	fis[4] = 1;
311	if (p->atapi) {
312		fis[5] = 0x14;
313		fis[6] = 0xeb;
314	}
315	fis[12] = 1;
316	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
317}
318
319static void
320ahci_port_reset(struct ahci_port *pr)
321{
322	pr->sctl = 0;
323	pr->serr = 0;
324	pr->sact = 0;
325	pr->xfermode = ATA_UDMA6;
326	pr->mult_sectors = 128;
327
328	if (!pr->bctx) {
329		pr->ssts = ATA_SS_DET_NO_DEVICE;
330		pr->sig = 0xFFFFFFFF;
331		pr->tfd = 0x7F;
332		return;
333	}
334	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
335		ATA_SS_IPM_ACTIVE;
336	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
337	if (!pr->atapi) {
338		pr->sig = PxSIG_ATA;
339		pr->tfd |= ATA_S_READY;
340	} else
341		pr->sig = PxSIG_ATAPI;
342	ahci_write_reset_fis_d2h(pr);
343}
344
345static void
346ahci_reset(struct pci_ahci_softc *sc)
347{
348	int i;
349
350	sc->ghc = AHCI_GHC_AE;
351	sc->is = 0;
352	for (i = 0; i < sc->ports; i++) {
353		sc->port[i].ie = 0;
354		sc->port[i].is = 0;
355		ahci_port_reset(&sc->port[i]);
356	}
357}
358
359static void
360ata_string(uint8_t *dest, const char *src, int len)
361{
362	int i;
363
364	for (i = 0; i < len; i++) {
365		if (*src)
366			dest[i ^ 1] = *src++;
367		else
368			dest[i ^ 1] = ' ';
369	}
370}
371
372static void
373atapi_string(uint8_t *dest, const char *src, int len)
374{
375	int i;
376
377	for (i = 0; i < len; i++) {
378		if (*src)
379			dest[i] = *src++;
380		else
381			dest[i] = ' ';
382	}
383}
384
385static void
386ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
387    int seek)
388{
389	struct ahci_ioreq *aior;
390	struct blockif_req *breq;
391	struct pci_ahci_softc *sc;
392	struct ahci_prdt_entry *prdt;
393	struct ahci_cmd_hdr *hdr;
394	uint64_t lba;
395	uint32_t len;
396	int i, err, iovcnt, ncq, readop;
397
398	sc = p->pr_sc;
399	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
400	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
401	ncq = 0;
402	readop = 1;
403
404	prdt += seek;
405	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
406			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
407		readop = 0;
408
409	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
410			cfis[2] == ATA_READ_FPDMA_QUEUED) {
411		lba = ((uint64_t)cfis[10] << 40) |
412			((uint64_t)cfis[9] << 32) |
413			((uint64_t)cfis[8] << 24) |
414			((uint64_t)cfis[6] << 16) |
415			((uint64_t)cfis[5] << 8) |
416			cfis[4];
417		len = cfis[11] << 8 | cfis[3];
418		if (!len)
419			len = 65536;
420		ncq = 1;
421	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
422		lba = ((uint64_t)cfis[10] << 40) |
423			((uint64_t)cfis[9] << 32) |
424			((uint64_t)cfis[8] << 24) |
425			((uint64_t)cfis[6] << 16) |
426			((uint64_t)cfis[5] << 8) |
427			cfis[4];
428		len = cfis[13] << 8 | cfis[12];
429		if (!len)
430			len = 65536;
431	} else {
432		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
433			(cfis[5] << 8) | cfis[4];
434		len = cfis[12];
435		if (!len)
436			len = 256;
437	}
438	lba *= blockif_sectsz(p->bctx);
439	len *= blockif_sectsz(p->bctx);
440
441	/*
442	 * Pull request off free list
443	 */
444	aior = STAILQ_FIRST(&p->iofhd);
445	assert(aior != NULL);
446	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
447	aior->cfis = cfis;
448	aior->slot = slot;
449	aior->len = len;
450	aior->done = done;
451	breq = &aior->io_req;
452	breq->br_offset = lba + done;
453	iovcnt = hdr->prdtl - seek;
454	if (iovcnt > BLOCKIF_IOV_MAX) {
455		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
456		iovcnt = BLOCKIF_IOV_MAX;
457	} else
458		aior->prdtl = 0;
459	breq->br_iovcnt = iovcnt;
460
461	/*
462	 * Build up the iovec based on the prdt
463	 */
464	for (i = 0; i < iovcnt; i++) {
465		uint32_t dbcsz;
466
467		dbcsz = (prdt->dbc & DBCMASK) + 1;
468		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
469		    prdt->dba, dbcsz);
470		breq->br_iov[i].iov_len = dbcsz;
471		aior->done += dbcsz;
472		prdt++;
473	}
474	if (readop)
475		err = blockif_read(p->bctx, breq);
476	else
477		err = blockif_write(p->bctx, breq);
478	assert(err == 0);
479
480	if (!aior->prdtl && ncq)
481		p->ci &= ~(1 << slot);
482}
483
484static void
485ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
486{
487	struct ahci_ioreq *aior;
488	struct blockif_req *breq;
489	int err;
490
491	/*
492	 * Pull request off free list
493	 */
494	aior = STAILQ_FIRST(&p->iofhd);
495	assert(aior != NULL);
496	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
497	aior->cfis = cfis;
498	aior->slot = slot;
499	aior->len = 0;
500	breq = &aior->io_req;
501
502	err = blockif_flush(p->bctx, breq);
503	assert(err == 0);
504}
505
506static inline void
507write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
508		void *buf, int size)
509{
510	struct ahci_cmd_hdr *hdr;
511	struct ahci_prdt_entry *prdt;
512	void *from;
513	int i, len;
514
515	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
516	len = size;
517	from = buf;
518	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
519	for (i = 0; i < hdr->prdtl && len; i++) {
520		uint8_t *ptr;
521		uint32_t dbcsz;
522
523		dbcsz = (prdt->dbc & DBCMASK) + 1;
524		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
525		memcpy(ptr, from, dbcsz);
526		len -= dbcsz;
527		from += dbcsz;
528		prdt++;
529	}
530	hdr->prdbc = size - len;
531}
532
533static void
534handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
535{
536	struct ahci_cmd_hdr *hdr;
537
538	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
539	if (p->atapi || hdr->prdtl == 0) {
540		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
541		p->is |= AHCI_P_IX_TFE;
542	} else {
543		uint16_t buf[256];
544		uint64_t sectors;
545
546		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
547		memset(buf, 0, sizeof(buf));
548		buf[0] = 0x0040;
549		/* TODO emulate different serial? */
550		ata_string((uint8_t *)(buf+10), "123456", 20);
551		ata_string((uint8_t *)(buf+23), "001", 8);
552		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
553		buf[47] = (0x8000 | 128);
554		buf[48] = 0x1;
555		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
556		buf[50] = (1 << 14);
557		buf[53] = (1 << 1 | 1 << 2);
558		if (p->mult_sectors)
559			buf[59] = (0x100 | p->mult_sectors);
560		buf[60] = sectors;
561		buf[61] = (sectors >> 16);
562		buf[63] = 0x7;
563		if (p->xfermode & ATA_WDMA0)
564			buf[63] |= (1 << ((p->xfermode & 7) + 8));
565		buf[64] = 0x3;
566		buf[65] = 100;
567		buf[66] = 100;
568		buf[67] = 100;
569		buf[68] = 100;
570		buf[75] = 31;
571		buf[76] = (1 << 8 | 1 << 2);
572		buf[80] = 0x1f0;
573		buf[81] = 0x28;
574		buf[82] = (1 << 5 | 1 << 14);
575		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
576		buf[84] = (1 << 14);
577		buf[85] = (1 << 5 | 1 << 14);
578		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
579		buf[87] = (1 << 14);
580		buf[88] = 0x7f;
581		if (p->xfermode & ATA_UDMA0)
582			buf[88] |= (1 << ((p->xfermode & 7) + 8));
583		buf[93] = (1 | 1 <<14);
584		buf[100] = sectors;
585		buf[101] = (sectors >> 16);
586		buf[102] = (sectors >> 32);
587		buf[103] = (sectors >> 48);
588		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
589		p->tfd = ATA_S_DSC | ATA_S_READY;
590		p->is |= AHCI_P_IX_DP;
591	}
592	p->ci &= ~(1 << slot);
593	ahci_generate_intr(p->pr_sc);
594}
595
596static void
597handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
598{
599	if (!p->atapi) {
600		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
601		p->is |= AHCI_P_IX_TFE;
602	} else {
603		uint16_t buf[256];
604
605		memset(buf, 0, sizeof(buf));
606		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
607		/* TODO emulate different serial? */
608		ata_string((uint8_t *)(buf+10), "123456", 20);
609		ata_string((uint8_t *)(buf+23), "001", 8);
610		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
611		buf[49] = (1 << 9 | 1 << 8);
612		buf[50] = (1 << 14 | 1);
613		buf[53] = (1 << 2 | 1 << 1);
614		buf[62] = 0x3f;
615		buf[63] = 7;
616		buf[64] = 3;
617		buf[65] = 100;
618		buf[66] = 100;
619		buf[67] = 100;
620		buf[68] = 100;
621		buf[76] = (1 << 2 | 1 << 1);
622		buf[78] = (1 << 5);
623		buf[80] = (0x1f << 4);
624		buf[82] = (1 << 4);
625		buf[83] = (1 << 14);
626		buf[84] = (1 << 14);
627		buf[85] = (1 << 4);
628		buf[87] = (1 << 14);
629		buf[88] = (1 << 14 | 0x7f);
630		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
631		p->tfd = ATA_S_DSC | ATA_S_READY;
632		p->is |= AHCI_P_IX_DHR;
633	}
634	p->ci &= ~(1 << slot);
635	ahci_generate_intr(p->pr_sc);
636}
637
638static void
639atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
640{
641	uint8_t buf[36];
642	uint8_t *acmd;
643	int len;
644
645	acmd = cfis + 0x40;
646
647	buf[0] = 0x05;
648	buf[1] = 0x80;
649	buf[2] = 0x00;
650	buf[3] = 0x21;
651	buf[4] = 31;
652	buf[5] = 0;
653	buf[6] = 0;
654	buf[7] = 0;
655	atapi_string(buf + 8, "BHYVE", 8);
656	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
657	atapi_string(buf + 32, "001", 4);
658
659	len = sizeof(buf);
660	if (len > acmd[4])
661		len = acmd[4];
662	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
663	write_prdt(p, slot, cfis, buf, len);
664	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
665}
666
667static void
668atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
669{
670	uint8_t buf[8];
671	uint64_t sectors;
672
673	sectors = blockif_size(p->bctx) / 2048;
674	be32enc(buf, sectors - 1);
675	be32enc(buf + 4, 2048);
676	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
677	write_prdt(p, slot, cfis, buf, sizeof(buf));
678	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
679}
680
681static void
682atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
683{
684	uint8_t *acmd;
685	uint8_t format;
686	int len;
687
688	acmd = cfis + 0x40;
689
690	len = be16dec(acmd + 7);
691	format = acmd[9] >> 6;
692	switch (format) {
693	case 0:
694	{
695		int msf, size;
696		uint64_t sectors;
697		uint8_t start_track, buf[20], *bp;
698
699		msf = (acmd[1] >> 1) & 1;
700		start_track = acmd[6];
701		if (start_track > 1 && start_track != 0xaa) {
702			uint32_t tfd;
703			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
704			p->asc = 0x24;
705			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
706			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
707			ahci_write_fis_d2h(p, slot, cfis, tfd);
708			return;
709		}
710		bp = buf + 2;
711		*bp++ = 1;
712		*bp++ = 1;
713		if (start_track <= 1) {
714			*bp++ = 0;
715			*bp++ = 0x14;
716			*bp++ = 1;
717			*bp++ = 0;
718			if (msf) {
719				*bp++ = 0;
720				lba_to_msf(bp, 0);
721				bp += 3;
722			} else {
723				*bp++ = 0;
724				*bp++ = 0;
725				*bp++ = 0;
726				*bp++ = 0;
727			}
728		}
729		*bp++ = 0;
730		*bp++ = 0x14;
731		*bp++ = 0xaa;
732		*bp++ = 0;
733		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
734		sectors >>= 2;
735		if (msf) {
736			*bp++ = 0;
737			lba_to_msf(bp, sectors);
738			bp += 3;
739		} else {
740			be32enc(bp, sectors);
741			bp += 4;
742		}
743		size = bp - buf;
744		be16enc(buf, size - 2);
745		if (len > size)
746			len = size;
747		write_prdt(p, slot, cfis, buf, len);
748		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
749		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
750		break;
751	}
752	case 1:
753	{
754		uint8_t buf[12];
755
756		memset(buf, 0, sizeof(buf));
757		buf[1] = 0xa;
758		buf[2] = 0x1;
759		buf[3] = 0x1;
760		if (len > sizeof(buf))
761			len = sizeof(buf);
762		write_prdt(p, slot, cfis, buf, len);
763		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
764		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
765		break;
766	}
767	case 2:
768	{
769		int msf, size;
770		uint64_t sectors;
771		uint8_t start_track, *bp, buf[50];
772
773		msf = (acmd[1] >> 1) & 1;
774		start_track = acmd[6];
775		bp = buf + 2;
776		*bp++ = 1;
777		*bp++ = 1;
778
779		*bp++ = 1;
780		*bp++ = 0x14;
781		*bp++ = 0;
782		*bp++ = 0xa0;
783		*bp++ = 0;
784		*bp++ = 0;
785		*bp++ = 0;
786		*bp++ = 0;
787		*bp++ = 1;
788		*bp++ = 0;
789		*bp++ = 0;
790
791		*bp++ = 1;
792		*bp++ = 0x14;
793		*bp++ = 0;
794		*bp++ = 0xa1;
795		*bp++ = 0;
796		*bp++ = 0;
797		*bp++ = 0;
798		*bp++ = 0;
799		*bp++ = 1;
800		*bp++ = 0;
801		*bp++ = 0;
802
803		*bp++ = 1;
804		*bp++ = 0x14;
805		*bp++ = 0;
806		*bp++ = 0xa2;
807		*bp++ = 0;
808		*bp++ = 0;
809		*bp++ = 0;
810		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
811		sectors >>= 2;
812		if (msf) {
813			*bp++ = 0;
814			lba_to_msf(bp, sectors);
815			bp += 3;
816		} else {
817			be32enc(bp, sectors);
818			bp += 4;
819		}
820
821		*bp++ = 1;
822		*bp++ = 0x14;
823		*bp++ = 0;
824		*bp++ = 1;
825		*bp++ = 0;
826		*bp++ = 0;
827		*bp++ = 0;
828		if (msf) {
829			*bp++ = 0;
830			lba_to_msf(bp, 0);
831			bp += 3;
832		} else {
833			*bp++ = 0;
834			*bp++ = 0;
835			*bp++ = 0;
836			*bp++ = 0;
837		}
838
839		size = bp - buf;
840		be16enc(buf, size - 2);
841		if (len > size)
842			len = size;
843		write_prdt(p, slot, cfis, buf, len);
844		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
845		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
846		break;
847	}
848	default:
849	{
850		uint32_t tfd;
851
852		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
853		p->asc = 0x24;
854		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
855		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
856		ahci_write_fis_d2h(p, slot, cfis, tfd);
857		break;
858	}
859	}
860}
861
862static void
863atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
864		uint32_t done, int seek)
865{
866	struct ahci_ioreq *aior;
867	struct ahci_cmd_hdr *hdr;
868	struct ahci_prdt_entry *prdt;
869	struct blockif_req *breq;
870	struct pci_ahci_softc *sc;
871	uint8_t *acmd;
872	uint64_t lba;
873	uint32_t len;
874	int i, err, iovcnt;
875
876	sc = p->pr_sc;
877	acmd = cfis + 0x40;
878	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
879	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
880
881	prdt += seek;
882	lba = be32dec(acmd + 2);
883	if (acmd[0] == READ_10)
884		len = be16dec(acmd + 7);
885	else
886		len = be32dec(acmd + 6);
887	if (len == 0) {
888		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
889		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
890	}
891	lba *= 2048;
892	len *= 2048;
893
894	/*
895	 * Pull request off free list
896	 */
897	aior = STAILQ_FIRST(&p->iofhd);
898	assert(aior != NULL);
899	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
900	aior->cfis = cfis;
901	aior->slot = slot;
902	aior->len = len;
903	aior->done = done;
904	breq = &aior->io_req;
905	breq->br_offset = lba + done;
906	iovcnt = hdr->prdtl - seek;
907	if (iovcnt > BLOCKIF_IOV_MAX) {
908		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
909		iovcnt = BLOCKIF_IOV_MAX;
910	} else
911		aior->prdtl = 0;
912	breq->br_iovcnt = iovcnt;
913
914	/*
915	 * Build up the iovec based on the prdt
916	 */
917	for (i = 0; i < iovcnt; i++) {
918		uint32_t dbcsz;
919
920		dbcsz = (prdt->dbc & DBCMASK) + 1;
921		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
922		    prdt->dba, dbcsz);
923		breq->br_iov[i].iov_len = dbcsz;
924		aior->done += dbcsz;
925		prdt++;
926	}
927	err = blockif_read(p->bctx, breq);
928	assert(err == 0);
929}
930
931static void
932atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
933{
934	uint8_t buf[64];
935	uint8_t *acmd;
936	int len;
937
938	acmd = cfis + 0x40;
939	len = acmd[4];
940	if (len > sizeof(buf))
941		len = sizeof(buf);
942	memset(buf, 0, len);
943	buf[0] = 0x70 | (1 << 7);
944	buf[2] = p->sense_key;
945	buf[7] = 10;
946	buf[12] = p->asc;
947	write_prdt(p, slot, cfis, buf, len);
948	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
949	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
950}
951
952static void
953atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
954{
955	uint8_t *acmd = cfis + 0x40;
956	uint32_t tfd;
957
958	switch (acmd[4] & 3) {
959	case 0:
960	case 1:
961	case 3:
962		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
963		tfd = ATA_S_READY | ATA_S_DSC;
964		break;
965	case 2:
966		/* TODO eject media */
967		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
968		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
969		p->asc = 0x53;
970		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
971		break;
972	}
973	ahci_write_fis_d2h(p, slot, cfis, tfd);
974}
975
976static void
977atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
978{
979	uint8_t *acmd;
980	uint32_t tfd;
981	uint8_t pc, code;
982	int len;
983
984	acmd = cfis + 0x40;
985	len = be16dec(acmd + 7);
986	pc = acmd[2] >> 6;
987	code = acmd[2] & 0x3f;
988
989	switch (pc) {
990	case 0:
991		switch (code) {
992		case MODEPAGE_RW_ERROR_RECOVERY:
993		{
994			uint8_t buf[16];
995
996			if (len > sizeof(buf))
997				len = sizeof(buf);
998
999			memset(buf, 0, sizeof(buf));
1000			be16enc(buf, 16 - 2);
1001			buf[2] = 0x70;
1002			buf[8] = 0x01;
1003			buf[9] = 16 - 10;
1004			buf[11] = 0x05;
1005			write_prdt(p, slot, cfis, buf, len);
1006			tfd = ATA_S_READY | ATA_S_DSC;
1007			break;
1008		}
1009		case MODEPAGE_CD_CAPABILITIES:
1010		{
1011			uint8_t buf[30];
1012
1013			if (len > sizeof(buf))
1014				len = sizeof(buf);
1015
1016			memset(buf, 0, sizeof(buf));
1017			be16enc(buf, 30 - 2);
1018			buf[2] = 0x70;
1019			buf[8] = 0x2A;
1020			buf[9] = 30 - 10;
1021			buf[10] = 0x08;
1022			buf[12] = 0x71;
1023			be16enc(&buf[18], 2);
1024			be16enc(&buf[20], 512);
1025			write_prdt(p, slot, cfis, buf, len);
1026			tfd = ATA_S_READY | ATA_S_DSC;
1027			break;
1028		}
1029		default:
1030			goto error;
1031			break;
1032		}
1033		break;
1034	case 3:
1035		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1036		p->asc = 0x39;
1037		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1038		break;
1039error:
1040	case 1:
1041	case 2:
1042		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1043		p->asc = 0x24;
1044		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1045		break;
1046	}
1047	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1048	ahci_write_fis_d2h(p, slot, cfis, tfd);
1049}
1050
1051static void
1052atapi_get_event_status_notification(struct ahci_port *p, int slot,
1053    uint8_t *cfis)
1054{
1055	uint8_t *acmd;
1056	uint32_t tfd;
1057
1058	acmd = cfis + 0x40;
1059
1060	/* we don't support asynchronous operation */
1061	if (!(acmd[1] & 1)) {
1062		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1063		p->asc = 0x24;
1064		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1065	} else {
1066		uint8_t buf[8];
1067		int len;
1068
1069		len = be16dec(acmd + 7);
1070		if (len > sizeof(buf))
1071			len = sizeof(buf);
1072
1073		memset(buf, 0, sizeof(buf));
1074		be16enc(buf, 8 - 2);
1075		buf[2] = 0x04;
1076		buf[3] = 0x10;
1077		buf[5] = 0x02;
1078		write_prdt(p, slot, cfis, buf, len);
1079		tfd = ATA_S_READY | ATA_S_DSC;
1080	}
1081	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1082	ahci_write_fis_d2h(p, slot, cfis, tfd);
1083}
1084
1085static void
1086handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1087{
1088	uint8_t *acmd;
1089
1090	acmd = cfis + 0x40;
1091
1092#ifdef AHCI_DEBUG
1093	{
1094		int i;
1095		DPRINTF("ACMD:");
1096		for (i = 0; i < 16; i++)
1097			DPRINTF("%02x ", acmd[i]);
1098		DPRINTF("\n");
1099	}
1100#endif
1101
1102	switch (acmd[0]) {
1103	case TEST_UNIT_READY:
1104		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1105		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1106		break;
1107	case INQUIRY:
1108		atapi_inquiry(p, slot, cfis);
1109		break;
1110	case READ_CAPACITY:
1111		atapi_read_capacity(p, slot, cfis);
1112		break;
1113	case PREVENT_ALLOW:
1114		/* TODO */
1115		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1116		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1117		break;
1118	case READ_TOC:
1119		atapi_read_toc(p, slot, cfis);
1120		break;
1121	case READ_10:
1122	case READ_12:
1123		atapi_read(p, slot, cfis, 0, 0);
1124		break;
1125	case REQUEST_SENSE:
1126		atapi_request_sense(p, slot, cfis);
1127		break;
1128	case START_STOP_UNIT:
1129		atapi_start_stop_unit(p, slot, cfis);
1130		break;
1131	case MODE_SENSE_10:
1132		atapi_mode_sense(p, slot, cfis);
1133		break;
1134	case GET_EVENT_STATUS_NOTIFICATION:
1135		atapi_get_event_status_notification(p, slot, cfis);
1136		break;
1137	default:
1138		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1139		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1140		p->asc = 0x20;
1141		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1142				ATA_S_READY | ATA_S_ERROR);
1143		break;
1144	}
1145}
1146
1147static void
1148ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1149{
1150
1151	switch (cfis[2]) {
1152	case ATA_ATA_IDENTIFY:
1153		handle_identify(p, slot, cfis);
1154		break;
1155	case ATA_SETFEATURES:
1156	{
1157		switch (cfis[3]) {
1158		case ATA_SF_ENAB_WCACHE:
1159		case ATA_SF_DIS_WCACHE:
1160		case ATA_SF_ENAB_RCACHE:
1161		case ATA_SF_DIS_RCACHE:
1162			p->tfd = ATA_S_DSC | ATA_S_READY;
1163			break;
1164		case ATA_SF_SETXFER:
1165		{
1166			switch (cfis[12] & 0xf8) {
1167			case ATA_PIO:
1168			case ATA_PIO0:
1169				break;
1170			case ATA_WDMA0:
1171			case ATA_UDMA0:
1172				p->xfermode = (cfis[12] & 0x7);
1173				break;
1174			}
1175			p->tfd = ATA_S_DSC | ATA_S_READY;
1176			break;
1177		}
1178		default:
1179			p->tfd = ATA_S_ERROR | ATA_S_READY;
1180			p->tfd |= (ATA_ERROR_ABORT << 8);
1181			break;
1182		}
1183		p->is |= AHCI_P_IX_DP;
1184		p->ci &= ~(1 << slot);
1185		ahci_generate_intr(p->pr_sc);
1186		break;
1187	}
1188	case ATA_SET_MULTI:
1189		if (cfis[12] != 0 &&
1190			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1191			p->tfd = ATA_S_ERROR | ATA_S_READY;
1192			p->tfd |= (ATA_ERROR_ABORT << 8);
1193		} else {
1194			p->mult_sectors = cfis[12];
1195			p->tfd = ATA_S_DSC | ATA_S_READY;
1196		}
1197		p->is |= AHCI_P_IX_DP;
1198		p->ci &= ~(1 << slot);
1199		ahci_generate_intr(p->pr_sc);
1200		break;
1201	case ATA_READ_DMA:
1202	case ATA_WRITE_DMA:
1203	case ATA_READ_DMA48:
1204	case ATA_WRITE_DMA48:
1205	case ATA_READ_FPDMA_QUEUED:
1206	case ATA_WRITE_FPDMA_QUEUED:
1207		ahci_handle_dma(p, slot, cfis, 0, 0);
1208		break;
1209	case ATA_FLUSHCACHE:
1210	case ATA_FLUSHCACHE48:
1211		ahci_handle_flush(p, slot, cfis);
1212		break;
1213	case ATA_STANDBY_CMD:
1214		break;
1215	case ATA_NOP:
1216	case ATA_STANDBY_IMMEDIATE:
1217	case ATA_IDLE_IMMEDIATE:
1218	case ATA_SLEEP:
1219		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1220		break;
1221	case ATA_ATAPI_IDENTIFY:
1222		handle_atapi_identify(p, slot, cfis);
1223		break;
1224	case ATA_PACKET_CMD:
1225		if (!p->atapi) {
1226			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1227			p->is |= AHCI_P_IX_TFE;
1228			p->ci &= ~(1 << slot);
1229			ahci_generate_intr(p->pr_sc);
1230		} else
1231			handle_packet_cmd(p, slot, cfis);
1232		break;
1233	default:
1234		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1235		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1236		p->is |= AHCI_P_IX_TFE;
1237		p->ci &= ~(1 << slot);
1238		ahci_generate_intr(p->pr_sc);
1239		break;
1240	}
1241}
1242
1243static void
1244ahci_handle_slot(struct ahci_port *p, int slot)
1245{
1246	struct ahci_cmd_hdr *hdr;
1247	struct ahci_prdt_entry *prdt;
1248	struct pci_ahci_softc *sc;
1249	uint8_t *cfis;
1250	int cfl;
1251
1252	sc = p->pr_sc;
1253	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1254	cfl = (hdr->flags & 0x1f) * 4;
1255	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1256			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1257	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1258
1259#ifdef AHCI_DEBUG
1260	DPRINTF("\ncfis:");
1261	for (i = 0; i < cfl; i++) {
1262		if (i % 10 == 0)
1263			DPRINTF("\n");
1264		DPRINTF("%02x ", cfis[i]);
1265	}
1266	DPRINTF("\n");
1267
1268	for (i = 0; i < hdr->prdtl; i++) {
1269		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1270		prdt++;
1271	}
1272#endif
1273
1274	if (cfis[0] != FIS_TYPE_REGH2D) {
1275		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1276		return;
1277	}
1278
1279	if (cfis[1] & 0x80) {
1280		ahci_handle_cmd(p, slot, cfis);
1281	} else {
1282		if (cfis[15] & (1 << 2))
1283			p->reset = 1;
1284		else if (p->reset) {
1285			p->reset = 0;
1286			ahci_port_reset(p);
1287		}
1288		p->ci &= ~(1 << slot);
1289	}
1290}
1291
1292static void
1293ahci_handle_port(struct ahci_port *p)
1294{
1295	int i;
1296
1297	if (!(p->cmd & AHCI_P_CMD_ST))
1298		return;
1299
1300	for (i = 0; (i < 32) && p->ci; i++) {
1301		if (p->ci & (1 << i))
1302			ahci_handle_slot(p, i);
1303	}
1304}
1305
1306/*
1307 * blockif callback routine - this runs in the context of the blockif
1308 * i/o thread, so the mutex needs to be acquired.
1309 */
1310static void
1311ata_ioreq_cb(struct blockif_req *br, int err)
1312{
1313	struct ahci_cmd_hdr *hdr;
1314	struct ahci_ioreq *aior;
1315	struct ahci_port *p;
1316	struct pci_ahci_softc *sc;
1317	uint32_t tfd;
1318	uint8_t *cfis;
1319	int pending, slot, ncq;
1320
1321	DPRINTF("%s %d\n", __func__, err);
1322
1323	ncq = 0;
1324	aior = br->br_param;
1325	p = aior->io_pr;
1326	cfis = aior->cfis;
1327	slot = aior->slot;
1328	pending = aior->prdtl;
1329	sc = p->pr_sc;
1330	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1331
1332	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1333			cfis[2] == ATA_READ_FPDMA_QUEUED)
1334		ncq = 1;
1335
1336	pthread_mutex_lock(&sc->mtx);
1337
1338	/*
1339	 * Move the blockif request back to the free list
1340	 */
1341	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1342
1343	if (pending && !err) {
1344		ahci_handle_dma(p, slot, cfis, aior->done,
1345		    hdr->prdtl - pending);
1346		goto out;
1347	}
1348
1349	if (!err && aior->done == aior->len) {
1350		tfd = ATA_S_READY | ATA_S_DSC;
1351		if (ncq)
1352			hdr->prdbc = 0;
1353		else
1354			hdr->prdbc = aior->len;
1355	} else {
1356		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1357		hdr->prdbc = 0;
1358		if (ncq)
1359			p->serr |= (1 << slot);
1360	}
1361
1362	if (ncq) {
1363		p->sact &= ~(1 << slot);
1364		ahci_write_fis_sdb(p, slot, tfd);
1365	} else
1366		ahci_write_fis_d2h(p, slot, cfis, tfd);
1367
1368out:
1369	pthread_mutex_unlock(&sc->mtx);
1370	DPRINTF("%s exit\n", __func__);
1371}
1372
1373static void
1374atapi_ioreq_cb(struct blockif_req *br, int err)
1375{
1376	struct ahci_cmd_hdr *hdr;
1377	struct ahci_ioreq *aior;
1378	struct ahci_port *p;
1379	struct pci_ahci_softc *sc;
1380	uint8_t *cfis;
1381	uint32_t tfd;
1382	int pending, slot;
1383
1384	DPRINTF("%s %d\n", __func__, err);
1385
1386	aior = br->br_param;
1387	p = aior->io_pr;
1388	cfis = aior->cfis;
1389	slot = aior->slot;
1390	pending = aior->prdtl;
1391	sc = p->pr_sc;
1392	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1393
1394	pthread_mutex_lock(&sc->mtx);
1395
1396	/*
1397	 * Move the blockif request back to the free list
1398	 */
1399	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
1400
1401	if (pending && !err) {
1402		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1403		goto out;
1404	}
1405
1406	if (!err && aior->done == aior->len) {
1407		tfd = ATA_S_READY | ATA_S_DSC;
1408		hdr->prdbc = aior->len;
1409	} else {
1410		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1411		p->asc = 0x21;
1412		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1413		hdr->prdbc = 0;
1414	}
1415
1416	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1417	ahci_write_fis_d2h(p, slot, cfis, tfd);
1418
1419out:
1420	pthread_mutex_unlock(&sc->mtx);
1421	DPRINTF("%s exit\n", __func__);
1422}
1423
1424static void
1425pci_ahci_ioreq_init(struct ahci_port *pr)
1426{
1427	struct ahci_ioreq *vr;
1428	int i;
1429
1430	pr->ioqsz = blockif_queuesz(pr->bctx);
1431	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1432	STAILQ_INIT(&pr->iofhd);
1433
1434	/*
1435	 * Add all i/o request entries to the free queue
1436	 */
1437	for (i = 0; i < pr->ioqsz; i++) {
1438		vr = &pr->ioreq[i];
1439		vr->io_pr = pr;
1440		if (!pr->atapi)
1441			vr->io_req.br_callback = ata_ioreq_cb;
1442		else
1443			vr->io_req.br_callback = atapi_ioreq_cb;
1444		vr->io_req.br_param = vr;
1445		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
1446	}
1447}
1448
1449static void
1450pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1451{
1452	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1453	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1454	struct ahci_port *p = &sc->port[port];
1455
1456	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1457		port, offset, value);
1458
1459	switch (offset) {
1460	case AHCI_P_CLB:
1461		p->clb = value;
1462		break;
1463	case AHCI_P_CLBU:
1464		p->clbu = value;
1465		break;
1466	case AHCI_P_FB:
1467		p->fb = value;
1468		break;
1469	case AHCI_P_FBU:
1470		p->fbu = value;
1471		break;
1472	case AHCI_P_IS:
1473		p->is &= ~value;
1474		break;
1475	case AHCI_P_IE:
1476		p->ie = value & 0xFDC000FF;
1477		ahci_generate_intr(sc);
1478		break;
1479	case AHCI_P_CMD:
1480	{
1481		p->cmd = value;
1482
1483		if (!(value & AHCI_P_CMD_ST)) {
1484			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
1485			p->ci = 0;
1486			p->sact = 0;
1487		} else {
1488			uint64_t clb;
1489
1490			p->cmd |= AHCI_P_CMD_CR;
1491			clb = (uint64_t)p->clbu << 32 | p->clb;
1492			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1493					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1494		}
1495
1496		if (value & AHCI_P_CMD_FRE) {
1497			uint64_t fb;
1498
1499			p->cmd |= AHCI_P_CMD_FR;
1500			fb = (uint64_t)p->fbu << 32 | p->fb;
1501			/* we don't support FBSCP, so rfis size is 256Bytes */
1502			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1503		} else {
1504			p->cmd &= ~AHCI_P_CMD_FR;
1505		}
1506
1507		if (value & AHCI_P_CMD_CLO) {
1508			p->tfd = 0;
1509			p->cmd &= ~AHCI_P_CMD_CLO;
1510		}
1511
1512		ahci_handle_port(p);
1513		break;
1514	}
1515	case AHCI_P_TFD:
1516	case AHCI_P_SIG:
1517	case AHCI_P_SSTS:
1518		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1519		break;
1520	case AHCI_P_SCTL:
1521		if (!(p->cmd & AHCI_P_CMD_ST)) {
1522			if (value & ATA_SC_DET_RESET)
1523				ahci_port_reset(p);
1524			p->sctl = value;
1525		}
1526		break;
1527	case AHCI_P_SERR:
1528		p->serr &= ~value;
1529		break;
1530	case AHCI_P_SACT:
1531		p->sact |= value;
1532		break;
1533	case AHCI_P_CI:
1534		p->ci |= value;
1535		ahci_handle_port(p);
1536		break;
1537	case AHCI_P_SNTF:
1538	case AHCI_P_FBS:
1539	default:
1540		break;
1541	}
1542}
1543
1544static void
1545pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1546{
1547	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1548		offset, value);
1549
1550	switch (offset) {
1551	case AHCI_CAP:
1552	case AHCI_PI:
1553	case AHCI_VS:
1554	case AHCI_CAP2:
1555		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1556		break;
1557	case AHCI_GHC:
1558		if (value & AHCI_GHC_HR)
1559			ahci_reset(sc);
1560		else if (value & AHCI_GHC_IE) {
1561			sc->ghc |= AHCI_GHC_IE;
1562			ahci_generate_intr(sc);
1563		}
1564		break;
1565	case AHCI_IS:
1566		sc->is &= ~value;
1567		ahci_generate_intr(sc);
1568		break;
1569	default:
1570		break;
1571	}
1572}
1573
1574static void
1575pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1576		int baridx, uint64_t offset, int size, uint64_t value)
1577{
1578	struct pci_ahci_softc *sc = pi->pi_arg;
1579
1580	assert(baridx == 5);
1581	assert(size == 4);
1582
1583	pthread_mutex_lock(&sc->mtx);
1584
1585	if (offset < AHCI_OFFSET)
1586		pci_ahci_host_write(sc, offset, value);
1587	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1588		pci_ahci_port_write(sc, offset, value);
1589	else
1590		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1591
1592	pthread_mutex_unlock(&sc->mtx);
1593}
1594
1595static uint64_t
1596pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1597{
1598	uint32_t value;
1599
1600	switch (offset) {
1601	case AHCI_CAP:
1602	case AHCI_GHC:
1603	case AHCI_IS:
1604	case AHCI_PI:
1605	case AHCI_VS:
1606	case AHCI_CCCC:
1607	case AHCI_CCCP:
1608	case AHCI_EM_LOC:
1609	case AHCI_EM_CTL:
1610	case AHCI_CAP2:
1611	{
1612		uint32_t *p = &sc->cap;
1613		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1614		value = *p;
1615		break;
1616	}
1617	default:
1618		value = 0;
1619		break;
1620	}
1621	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1622		offset, value);
1623
1624	return (value);
1625}
1626
1627static uint64_t
1628pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1629{
1630	uint32_t value;
1631	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1632	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1633
1634	switch (offset) {
1635	case AHCI_P_CLB:
1636	case AHCI_P_CLBU:
1637	case AHCI_P_FB:
1638	case AHCI_P_FBU:
1639	case AHCI_P_IS:
1640	case AHCI_P_IE:
1641	case AHCI_P_CMD:
1642	case AHCI_P_TFD:
1643	case AHCI_P_SIG:
1644	case AHCI_P_SSTS:
1645	case AHCI_P_SCTL:
1646	case AHCI_P_SERR:
1647	case AHCI_P_SACT:
1648	case AHCI_P_CI:
1649	case AHCI_P_SNTF:
1650	case AHCI_P_FBS:
1651	{
1652		uint32_t *p= &sc->port[port].clb;
1653		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1654		value = *p;
1655		break;
1656	}
1657	default:
1658		value = 0;
1659		break;
1660	}
1661
1662	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
1663		port, offset, value);
1664
1665	return value;
1666}
1667
1668static uint64_t
1669pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1670    uint64_t offset, int size)
1671{
1672	struct pci_ahci_softc *sc = pi->pi_arg;
1673	uint32_t value;
1674
1675	assert(baridx == 5);
1676	assert(size == 4);
1677
1678	pthread_mutex_lock(&sc->mtx);
1679
1680	if (offset < AHCI_OFFSET)
1681		value = pci_ahci_host_read(sc, offset);
1682	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1683		value = pci_ahci_port_read(sc, offset);
1684	else {
1685		value = 0;
1686		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
1687	}
1688
1689	pthread_mutex_unlock(&sc->mtx);
1690
1691	return (value);
1692}
1693
1694static int
1695pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
1696{
1697	char bident[sizeof("XX:X:X")];
1698	struct blockif_ctxt *bctxt;
1699	struct pci_ahci_softc *sc;
1700	int ret, slots;
1701
1702	ret = 0;
1703
1704	if (opts == NULL) {
1705		fprintf(stderr, "pci_ahci: backing device required\n");
1706		return (1);
1707	}
1708
1709#ifdef AHCI_DEBUG
1710	dbg = fopen("/tmp/log", "w+");
1711#endif
1712
1713       	sc = malloc(sizeof(struct pci_ahci_softc));
1714	memset(sc, 0, sizeof(struct pci_ahci_softc));
1715	pi->pi_arg = sc;
1716	sc->asc_pi = pi;
1717	sc->ports = MAX_PORTS;
1718
1719	/*
1720	 * Only use port 0 for a backing device. All other ports will be
1721	 * marked as unused
1722	 */
1723	sc->port[0].atapi = atapi;
1724
1725	/*
1726	 * Attempt to open the backing image. Use the PCI
1727	 * slot/func for the identifier string.
1728	 */
1729	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
1730	bctxt = blockif_open(opts, bident);
1731	if (bctxt == NULL) {
1732		ret = 1;
1733		goto open_fail;
1734	}
1735	sc->port[0].bctx = bctxt;
1736	sc->port[0].pr_sc = sc;
1737
1738	/*
1739	 * Allocate blockif request structures and add them
1740	 * to the free list
1741	 */
1742	pci_ahci_ioreq_init(&sc->port[0]);
1743
1744	pthread_mutex_init(&sc->mtx, NULL);
1745
1746	/* Intel ICH8 AHCI */
1747	slots = sc->port[0].ioqsz;
1748	if (slots > 32)
1749		slots = 32;
1750	--slots;
1751	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
1752	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
1753	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
1754	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
1755	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
1756
1757	/* Only port 0 implemented */
1758	sc->pi = 1;
1759	sc->vs = 0x10300;
1760	sc->cap2 = AHCI_CAP2_APST;
1761	ahci_reset(sc);
1762
1763	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
1764	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
1765	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
1766	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
1767	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
1768	pci_emul_add_msicap(pi, 1);
1769	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
1770	    AHCI_OFFSET + sc->ports * AHCI_STEP);
1771
1772open_fail:
1773	if (ret) {
1774		blockif_close(sc->port[0].bctx);
1775		free(sc);
1776	}
1777
1778	return (ret);
1779}
1780
1781static int
1782pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1783{
1784
1785	return (pci_ahci_init(ctx, pi, opts, 0));
1786}
1787
1788static int
1789pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1790{
1791
1792	return (pci_ahci_init(ctx, pi, opts, 1));
1793}
1794
1795/*
1796 * Use separate emulation names to distinguish drive and atapi devices
1797 */
1798struct pci_devemu pci_de_ahci_hd = {
1799	.pe_emu =	"ahci-hd",
1800	.pe_init =	pci_ahci_hd_init,
1801	.pe_barwrite =	pci_ahci_write,
1802	.pe_barread =	pci_ahci_read
1803};
1804PCI_EMUL_SET(pci_de_ahci_hd);
1805
1806struct pci_devemu pci_de_ahci_cd = {
1807	.pe_emu =	"ahci-cd",
1808	.pe_init =	pci_ahci_atapi_init,
1809	.pe_barwrite =	pci_ahci_write,
1810	.pe_barread =	pci_ahci_read
1811};
1812PCI_EMUL_SET(pci_de_ahci_cd);
1813