pci_ahci.c revision 280738
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 280738 2015-03-27 08:47:54Z mav $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 280738 2015-03-27 08:47:54Z mav $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <pthread_np.h>
52#include <inttypes.h>
53
54#include "bhyverun.h"
55#include "pci_emul.h"
56#include "ahci.h"
57#include "block_if.h"
58
59#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
60
61#define	PxSIG_ATA	0x00000101 /* ATA drive */
62#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
63
64enum sata_fis_type {
65	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
66	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
67	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
68	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
69	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
70	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
71	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
72	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
73};
74
75/*
76 * SCSI opcodes
77 */
78#define	TEST_UNIT_READY		0x00
79#define	REQUEST_SENSE		0x03
80#define	INQUIRY			0x12
81#define	START_STOP_UNIT		0x1B
82#define	PREVENT_ALLOW		0x1E
83#define	READ_CAPACITY		0x25
84#define	READ_10			0x28
85#define	POSITION_TO_ELEMENT	0x2B
86#define	READ_TOC		0x43
87#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
88#define	MODE_SENSE_10		0x5A
89#define	READ_12			0xA8
90#define	READ_CD			0xBE
91
92/*
93 * SCSI mode page codes
94 */
95#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
96#define	MODEPAGE_CD_CAPABILITIES	0x2A
97
98/*
99 * ATA commands
100 */
101#define	ATA_SF_ENAB_SATA_SF		0x10
102#define		ATA_SATA_SF_AN		0x05
103#define	ATA_SF_DIS_SATA_SF		0x90
104
105/*
106 * Debug printf
107 */
108#ifdef AHCI_DEBUG
109static FILE *dbg;
110#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
111#else
112#define DPRINTF(format, arg...)
113#endif
114#define WPRINTF(format, arg...) printf(format, ##arg)
115
116struct ahci_ioreq {
117	struct blockif_req io_req;
118	struct ahci_port *io_pr;
119	STAILQ_ENTRY(ahci_ioreq) io_flist;
120	TAILQ_ENTRY(ahci_ioreq) io_blist;
121	uint8_t *cfis;
122	uint32_t len;
123	uint32_t done;
124	int slot;
125	int prdtl;
126};
127
128struct ahci_port {
129	struct blockif_ctxt *bctx;
130	struct pci_ahci_softc *pr_sc;
131	uint8_t *cmd_lst;
132	uint8_t *rfis;
133	int atapi;
134	int reset;
135	int mult_sectors;
136	uint8_t xfermode;
137	uint8_t err_cfis[20];
138	uint8_t sense_key;
139	uint8_t asc;
140	uint32_t pending;
141
142	uint32_t clb;
143	uint32_t clbu;
144	uint32_t fb;
145	uint32_t fbu;
146	uint32_t is;
147	uint32_t ie;
148	uint32_t cmd;
149	uint32_t unused0;
150	uint32_t tfd;
151	uint32_t sig;
152	uint32_t ssts;
153	uint32_t sctl;
154	uint32_t serr;
155	uint32_t sact;
156	uint32_t ci;
157	uint32_t sntf;
158	uint32_t fbs;
159
160	/*
161	 * i/o request info
162	 */
163	struct ahci_ioreq *ioreq;
164	int ioqsz;
165	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
166	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
167};
168
169struct ahci_cmd_hdr {
170	uint16_t flags;
171	uint16_t prdtl;
172	uint32_t prdbc;
173	uint64_t ctba;
174	uint32_t reserved[4];
175};
176
177struct ahci_prdt_entry {
178	uint64_t dba;
179	uint32_t reserved;
180#define	DBCMASK		0x3fffff
181	uint32_t dbc;
182};
183
184struct pci_ahci_softc {
185	struct pci_devinst *asc_pi;
186	pthread_mutex_t	mtx;
187	int ports;
188	uint32_t cap;
189	uint32_t ghc;
190	uint32_t is;
191	uint32_t pi;
192	uint32_t vs;
193	uint32_t ccc_ctl;
194	uint32_t ccc_pts;
195	uint32_t em_loc;
196	uint32_t em_ctl;
197	uint32_t cap2;
198	uint32_t bohc;
199	uint32_t lintr;
200	struct ahci_port port[MAX_PORTS];
201};
202#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
203
204static inline void lba_to_msf(uint8_t *buf, int lba)
205{
206	lba += 150;
207	buf[0] = (lba / 75) / 60;
208	buf[1] = (lba / 75) % 60;
209	buf[2] = lba % 75;
210}
211
212/*
213 * generate HBA intr depending on whether or not ports within
214 * the controller have an interrupt pending.
215 */
216static void
217ahci_generate_intr(struct pci_ahci_softc *sc)
218{
219	struct pci_devinst *pi;
220	int i;
221
222	pi = sc->asc_pi;
223
224	for (i = 0; i < sc->ports; i++) {
225		struct ahci_port *pr;
226		pr = &sc->port[i];
227		if (pr->is & pr->ie)
228			sc->is |= (1 << i);
229	}
230
231	DPRINTF("%s %x\n", __func__, sc->is);
232
233	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
234		if (pci_msi_enabled(pi)) {
235			/*
236			 * Generate an MSI interrupt on every edge
237			 */
238			pci_generate_msi(pi, 0);
239		} else if (!sc->lintr) {
240			/*
241			 * Only generate a pin-based interrupt if one wasn't
242			 * in progress
243			 */
244			sc->lintr = 1;
245			pci_lintr_assert(pi);
246		}
247	} else if (sc->lintr) {
248		/*
249		 * No interrupts: deassert pin-based signal if it had
250		 * been asserted
251		 */
252		pci_lintr_deassert(pi);
253		sc->lintr = 0;
254	}
255}
256
257static void
258ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
259{
260	int offset, len, irq;
261
262	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
263		return;
264
265	switch (ft) {
266	case FIS_TYPE_REGD2H:
267		offset = 0x40;
268		len = 20;
269		irq = AHCI_P_IX_DHR;
270		break;
271	case FIS_TYPE_SETDEVBITS:
272		offset = 0x58;
273		len = 8;
274		irq = AHCI_P_IX_SDB;
275		break;
276	case FIS_TYPE_PIOSETUP:
277		offset = 0x20;
278		len = 20;
279		irq = 0;
280		break;
281	default:
282		WPRINTF("unsupported fis type %d\n", ft);
283		return;
284	}
285	memcpy(p->rfis + offset, fis, len);
286	if (irq) {
287		p->is |= irq;
288		ahci_generate_intr(p->pr_sc);
289	}
290}
291
292static void
293ahci_write_fis_piosetup(struct ahci_port *p)
294{
295	uint8_t fis[20];
296
297	memset(fis, 0, sizeof(fis));
298	fis[0] = FIS_TYPE_PIOSETUP;
299	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
300}
301
302static void
303ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
304{
305	uint8_t fis[8];
306	uint8_t error;
307
308	error = (tfd >> 8) & 0xff;
309	memset(fis, 0, sizeof(fis));
310	fis[0] = FIS_TYPE_SETDEVBITS;
311	fis[1] = (1 << 6);
312	fis[2] = tfd & 0x77;
313	fis[3] = error;
314	if (fis[2] & ATA_S_ERROR) {
315		p->is |= AHCI_P_IX_TFE;
316		p->err_cfis[0] = slot;
317		p->err_cfis[2] = tfd & 0x77;
318		p->err_cfis[3] = error;
319		memcpy(&p->err_cfis[4], cfis + 4, 16);
320	} else {
321		*(uint32_t *)(fis + 4) = (1 << slot);
322		p->sact &= ~(1 << slot);
323	}
324	p->tfd = tfd;
325	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
326}
327
328static void
329ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
330{
331	uint8_t fis[20];
332	uint8_t error;
333
334	error = (tfd >> 8) & 0xff;
335	memset(fis, 0, sizeof(fis));
336	fis[0] = FIS_TYPE_REGD2H;
337	fis[1] = (1 << 6);
338	fis[2] = tfd & 0xff;
339	fis[3] = error;
340	fis[4] = cfis[4];
341	fis[5] = cfis[5];
342	fis[6] = cfis[6];
343	fis[7] = cfis[7];
344	fis[8] = cfis[8];
345	fis[9] = cfis[9];
346	fis[10] = cfis[10];
347	fis[11] = cfis[11];
348	fis[12] = cfis[12];
349	fis[13] = cfis[13];
350	if (fis[2] & ATA_S_ERROR) {
351		p->is |= AHCI_P_IX_TFE;
352		p->err_cfis[0] = 0x80;
353		p->err_cfis[2] = tfd & 0xff;
354		p->err_cfis[3] = error;
355		memcpy(&p->err_cfis[4], cfis + 4, 16);
356	} else
357		p->ci &= ~(1 << slot);
358	p->tfd = tfd;
359	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
360}
361
362static void
363ahci_write_reset_fis_d2h(struct ahci_port *p)
364{
365	uint8_t fis[20];
366
367	memset(fis, 0, sizeof(fis));
368	fis[0] = FIS_TYPE_REGD2H;
369	fis[3] = 1;
370	fis[4] = 1;
371	if (p->atapi) {
372		fis[5] = 0x14;
373		fis[6] = 0xeb;
374	}
375	fis[12] = 1;
376	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
377}
378
379static void
380ahci_check_stopped(struct ahci_port *p)
381{
382	/*
383	 * If we are no longer processing the command list and nothing
384	 * is in-flight, clear the running bit, the current command
385	 * slot, the command issue and active bits.
386	 */
387	if (!(p->cmd & AHCI_P_CMD_ST)) {
388		if (p->pending == 0) {
389			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
390			p->ci = 0;
391			p->sact = 0;
392		}
393	}
394}
395
396static void
397ahci_port_stop(struct ahci_port *p)
398{
399	struct ahci_ioreq *aior;
400	uint8_t *cfis;
401	int slot;
402	int ncq;
403	int error;
404
405	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
406
407	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
408		/*
409		 * Try to cancel the outstanding blockif request.
410		 */
411		error = blockif_cancel(p->bctx, &aior->io_req);
412		if (error != 0)
413			continue;
414
415		slot = aior->slot;
416		cfis = aior->cfis;
417		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
418		    cfis[2] == ATA_READ_FPDMA_QUEUED)
419			ncq = 1;
420
421		if (ncq)
422			p->sact &= ~(1 << slot);
423		else
424			p->ci &= ~(1 << slot);
425
426		/*
427		 * This command is now done.
428		 */
429		p->pending &= ~(1 << slot);
430
431		/*
432		 * Delete the blockif request from the busy list
433		 */
434		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
435
436		/*
437		 * Move the blockif request back to the free list
438		 */
439		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
440	}
441
442	ahci_check_stopped(p);
443}
444
445static void
446ahci_port_reset(struct ahci_port *pr)
447{
448	pr->serr = 0;
449	pr->sact = 0;
450	pr->xfermode = ATA_UDMA6;
451	pr->mult_sectors = 128;
452
453	if (!pr->bctx) {
454		pr->ssts = ATA_SS_DET_NO_DEVICE;
455		pr->sig = 0xFFFFFFFF;
456		pr->tfd = 0x7F;
457		return;
458	}
459	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
460	if (pr->sctl & ATA_SC_SPD_MASK)
461		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
462	else
463		pr->ssts |= ATA_SS_SPD_GEN3;
464	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
465	if (!pr->atapi) {
466		pr->sig = PxSIG_ATA;
467		pr->tfd |= ATA_S_READY;
468	} else
469		pr->sig = PxSIG_ATAPI;
470	ahci_write_reset_fis_d2h(pr);
471}
472
473static void
474ahci_reset(struct pci_ahci_softc *sc)
475{
476	int i;
477
478	sc->ghc = AHCI_GHC_AE;
479	sc->is = 0;
480
481	if (sc->lintr) {
482		pci_lintr_deassert(sc->asc_pi);
483		sc->lintr = 0;
484	}
485
486	for (i = 0; i < sc->ports; i++) {
487		sc->port[i].ie = 0;
488		sc->port[i].is = 0;
489		sc->port[i].sctl = 0;
490		ahci_port_reset(&sc->port[i]);
491	}
492}
493
494static void
495ata_string(uint8_t *dest, const char *src, int len)
496{
497	int i;
498
499	for (i = 0; i < len; i++) {
500		if (*src)
501			dest[i ^ 1] = *src++;
502		else
503			dest[i ^ 1] = ' ';
504	}
505}
506
507static void
508atapi_string(uint8_t *dest, const char *src, int len)
509{
510	int i;
511
512	for (i = 0; i < len; i++) {
513		if (*src)
514			dest[i] = *src++;
515		else
516			dest[i] = ' ';
517	}
518}
519
520static void
521ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
522    int seek)
523{
524	struct ahci_ioreq *aior;
525	struct blockif_req *breq;
526	struct pci_ahci_softc *sc;
527	struct ahci_prdt_entry *prdt;
528	struct ahci_cmd_hdr *hdr;
529	uint64_t lba;
530	uint32_t len;
531	int i, err, iovcnt, ncq, readop;
532
533	sc = p->pr_sc;
534	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
535	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
536	ncq = 0;
537	readop = 1;
538
539	prdt += seek;
540	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
541	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
542	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
543	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
544		readop = 0;
545
546	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
547	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
548		lba = ((uint64_t)cfis[10] << 40) |
549			((uint64_t)cfis[9] << 32) |
550			((uint64_t)cfis[8] << 24) |
551			((uint64_t)cfis[6] << 16) |
552			((uint64_t)cfis[5] << 8) |
553			cfis[4];
554		len = cfis[11] << 8 | cfis[3];
555		if (!len)
556			len = 65536;
557		ncq = 1;
558	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
559	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
560	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
561		lba = ((uint64_t)cfis[10] << 40) |
562			((uint64_t)cfis[9] << 32) |
563			((uint64_t)cfis[8] << 24) |
564			((uint64_t)cfis[6] << 16) |
565			((uint64_t)cfis[5] << 8) |
566			cfis[4];
567		len = cfis[13] << 8 | cfis[12];
568		if (!len)
569			len = 65536;
570	} else {
571		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
572			(cfis[5] << 8) | cfis[4];
573		len = cfis[12];
574		if (!len)
575			len = 256;
576	}
577	lba *= blockif_sectsz(p->bctx);
578	len *= blockif_sectsz(p->bctx);
579
580	/*
581	 * Pull request off free list
582	 */
583	aior = STAILQ_FIRST(&p->iofhd);
584	assert(aior != NULL);
585	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
586	aior->cfis = cfis;
587	aior->slot = slot;
588	aior->len = len;
589	aior->done = done;
590	breq = &aior->io_req;
591	breq->br_offset = lba + done;
592	iovcnt = hdr->prdtl - seek;
593	if (iovcnt > BLOCKIF_IOV_MAX) {
594		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
595		iovcnt = BLOCKIF_IOV_MAX;
596	} else
597		aior->prdtl = 0;
598	breq->br_iovcnt = iovcnt;
599
600	/*
601	 * Mark this command in-flight.
602	 */
603	p->pending |= 1 << slot;
604
605	/*
606	 * Stuff request onto busy list
607	 */
608	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
609
610	/*
611	 * Build up the iovec based on the prdt
612	 */
613	for (i = 0; i < iovcnt; i++) {
614		uint32_t dbcsz;
615
616		dbcsz = (prdt->dbc & DBCMASK) + 1;
617		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
618		    prdt->dba, dbcsz);
619		breq->br_iov[i].iov_len = dbcsz;
620		aior->done += dbcsz;
621		prdt++;
622	}
623	if (readop)
624		err = blockif_read(p->bctx, breq);
625	else
626		err = blockif_write(p->bctx, breq);
627	assert(err == 0);
628
629	if (ncq)
630		p->ci &= ~(1 << slot);
631}
632
633static void
634ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
635{
636	struct ahci_ioreq *aior;
637	struct blockif_req *breq;
638	int err;
639
640	/*
641	 * Pull request off free list
642	 */
643	aior = STAILQ_FIRST(&p->iofhd);
644	assert(aior != NULL);
645	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
646	aior->cfis = cfis;
647	aior->slot = slot;
648	aior->len = 0;
649	aior->done = 0;
650	aior->prdtl = 0;
651	breq = &aior->io_req;
652
653	/*
654	 * Mark this command in-flight.
655	 */
656	p->pending |= 1 << slot;
657
658	/*
659	 * Stuff request onto busy list
660	 */
661	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
662
663	err = blockif_flush(p->bctx, breq);
664	assert(err == 0);
665}
666
667static inline void
668read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
669		void *buf, int size)
670{
671	struct ahci_cmd_hdr *hdr;
672	struct ahci_prdt_entry *prdt;
673	void *to;
674	int i, len;
675
676	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
677	len = size;
678	to = buf;
679	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
680	for (i = 0; i < hdr->prdtl && len; i++) {
681		uint8_t *ptr;
682		uint32_t dbcsz;
683		int sublen;
684
685		dbcsz = (prdt->dbc & DBCMASK) + 1;
686		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
687		sublen = len < dbcsz ? len : dbcsz;
688		memcpy(to, ptr, sublen);
689		len -= sublen;
690		to += sublen;
691		prdt++;
692	}
693}
694
695static void
696ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
697{
698	struct ahci_ioreq *aior;
699	struct blockif_req *breq;
700	uint8_t *entry;
701	uint64_t elba;
702	uint32_t len, elen;
703	int err;
704	uint8_t buf[512];
705
706	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
707		len = (uint16_t)cfis[13] << 8 | cfis[12];
708		len *= 512;
709	} else { /* ATA_SEND_FPDMA_QUEUED */
710		len = (uint16_t)cfis[11] << 8 | cfis[3];
711		len *= 512;
712	}
713	read_prdt(p, slot, cfis, buf, sizeof(buf));
714
715next:
716	entry = &buf[done];
717	elba = ((uint64_t)entry[5] << 40) |
718		((uint64_t)entry[4] << 32) |
719		((uint64_t)entry[3] << 24) |
720		((uint64_t)entry[2] << 16) |
721		((uint64_t)entry[1] << 8) |
722		entry[0];
723	elen = (uint16_t)entry[7] << 8 | entry[6];
724	done += 8;
725	if (elen == 0) {
726		if (done >= len) {
727			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
728			p->pending &= ~(1 << slot);
729			ahci_check_stopped(p);
730			return;
731		}
732		goto next;
733	}
734
735	/*
736	 * Pull request off free list
737	 */
738	aior = STAILQ_FIRST(&p->iofhd);
739	assert(aior != NULL);
740	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
741	aior->cfis = cfis;
742	aior->slot = slot;
743	aior->len = len;
744	aior->done = done;
745	aior->prdtl = 0;
746
747	breq = &aior->io_req;
748	breq->br_offset = elba * blockif_sectsz(p->bctx);
749	breq->br_iovcnt = 1;
750	breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
751
752	/*
753	 * Mark this command in-flight.
754	 */
755	p->pending |= 1 << slot;
756
757	/*
758	 * Stuff request onto busy list
759	 */
760	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
761
762	err = blockif_delete(p->bctx, breq);
763	assert(err == 0);
764}
765
766static inline void
767write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
768		void *buf, int size)
769{
770	struct ahci_cmd_hdr *hdr;
771	struct ahci_prdt_entry *prdt;
772	void *from;
773	int i, len;
774
775	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
776	len = size;
777	from = buf;
778	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
779	for (i = 0; i < hdr->prdtl && len; i++) {
780		uint8_t *ptr;
781		uint32_t dbcsz;
782		int sublen;
783
784		dbcsz = (prdt->dbc & DBCMASK) + 1;
785		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
786		sublen = len < dbcsz ? len : dbcsz;
787		memcpy(ptr, from, sublen);
788		len -= sublen;
789		from += sublen;
790		prdt++;
791	}
792	hdr->prdbc = size - len;
793}
794
795static void
796ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
797{
798	struct ahci_cmd_hdr *hdr;
799	uint8_t buf[512];
800
801	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
802	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
803	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
804		ahci_write_fis_d2h(p, slot, cfis,
805		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
806		return;
807	}
808
809	memset(buf, 0, sizeof(buf));
810	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
811
812	if (cfis[2] == ATA_READ_LOG_EXT)
813		ahci_write_fis_piosetup(p);
814	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
815	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
816}
817
818static void
819handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
820{
821	struct ahci_cmd_hdr *hdr;
822
823	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
824	if (p->atapi || hdr->prdtl == 0) {
825		ahci_write_fis_d2h(p, slot, cfis,
826		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
827	} else {
828		uint16_t buf[256];
829		uint64_t sectors;
830		int sectsz, psectsz, psectoff, candelete, ro;
831		uint16_t cyl;
832		uint8_t sech, heads;
833
834		ro = blockif_is_ro(p->bctx);
835		candelete = blockif_candelete(p->bctx);
836		sectsz = blockif_sectsz(p->bctx);
837		sectors = blockif_size(p->bctx) / sectsz;
838		blockif_chs(p->bctx, &cyl, &heads, &sech);
839		blockif_psectsz(p->bctx, &psectsz, &psectoff);
840		memset(buf, 0, sizeof(buf));
841		buf[0] = 0x0040;
842		buf[1] = cyl;
843		buf[3] = heads;
844		buf[6] = sech;
845		/* TODO emulate different serial? */
846		ata_string((uint8_t *)(buf+10), "123456", 20);
847		ata_string((uint8_t *)(buf+23), "001", 8);
848		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
849		buf[47] = (0x8000 | 128);
850		buf[48] = 0x1;
851		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
852		buf[50] = (1 << 14);
853		buf[53] = (1 << 1 | 1 << 2);
854		if (p->mult_sectors)
855			buf[59] = (0x100 | p->mult_sectors);
856		if (sectors <= 0x0fffffff) {
857			buf[60] = sectors;
858			buf[61] = (sectors >> 16);
859		} else {
860			buf[60] = 0xffff;
861			buf[61] = 0x0fff;
862		}
863		buf[63] = 0x7;
864		if (p->xfermode & ATA_WDMA0)
865			buf[63] |= (1 << ((p->xfermode & 7) + 8));
866		buf[64] = 0x3;
867		buf[65] = 120;
868		buf[66] = 120;
869		buf[67] = 120;
870		buf[68] = 120;
871		buf[69] = 0;
872		buf[75] = 31;
873		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
874			   ATA_SUPPORT_NCQ);
875		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
876			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
877		buf[80] = 0x1f0;
878		buf[81] = 0x28;
879		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
880			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
881		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
882			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
883		buf[84] = (1 << 14);
884		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
885			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
886		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
887			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
888		buf[87] = (1 << 14);
889		buf[88] = 0x7f;
890		if (p->xfermode & ATA_UDMA0)
891			buf[88] |= (1 << ((p->xfermode & 7) + 8));
892		buf[93] = (1 | 1 <<14);
893		buf[100] = sectors;
894		buf[101] = (sectors >> 16);
895		buf[102] = (sectors >> 32);
896		buf[103] = (sectors >> 48);
897		if (candelete && !ro) {
898			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
899			buf[105] = 1;
900			buf[169] = ATA_SUPPORT_DSM_TRIM;
901		}
902		buf[106] = 0x4000;
903		buf[209] = 0x4000;
904		if (psectsz > sectsz) {
905			buf[106] |= 0x2000;
906			buf[106] |= ffsl(psectsz / sectsz) - 1;
907			buf[209] |= (psectoff / sectsz);
908		}
909		if (sectsz > 512) {
910			buf[106] |= 0x1000;
911			buf[117] = sectsz / 2;
912			buf[118] = ((sectsz / 2) >> 16);
913		}
914		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
915		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
916		buf[222] = 0x1020;
917		ahci_write_fis_piosetup(p);
918		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
919		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
920	}
921}
922
923static void
924handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
925{
926	if (!p->atapi) {
927		ahci_write_fis_d2h(p, slot, cfis,
928		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
929	} else {
930		uint16_t buf[256];
931
932		memset(buf, 0, sizeof(buf));
933		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
934		/* TODO emulate different serial? */
935		ata_string((uint8_t *)(buf+10), "123456", 20);
936		ata_string((uint8_t *)(buf+23), "001", 8);
937		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
938		buf[49] = (1 << 9 | 1 << 8);
939		buf[50] = (1 << 14 | 1);
940		buf[53] = (1 << 2 | 1 << 1);
941		buf[62] = 0x3f;
942		buf[63] = 7;
943		buf[64] = 3;
944		buf[65] = 100;
945		buf[66] = 100;
946		buf[67] = 100;
947		buf[68] = 100;
948		buf[76] = (1 << 2 | 1 << 1);
949		buf[78] = (1 << 5);
950		buf[80] = (0x1f << 4);
951		buf[82] = (1 << 4);
952		buf[83] = (1 << 14);
953		buf[84] = (1 << 14);
954		buf[85] = (1 << 4);
955		buf[87] = (1 << 14);
956		buf[88] = (1 << 14 | 0x7f);
957		ahci_write_fis_piosetup(p);
958		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
959		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
960	}
961}
962
963static void
964atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
965{
966	uint8_t buf[36];
967	uint8_t *acmd;
968	int len;
969
970	acmd = cfis + 0x40;
971
972	buf[0] = 0x05;
973	buf[1] = 0x80;
974	buf[2] = 0x00;
975	buf[3] = 0x21;
976	buf[4] = 31;
977	buf[5] = 0;
978	buf[6] = 0;
979	buf[7] = 0;
980	atapi_string(buf + 8, "BHYVE", 8);
981	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
982	atapi_string(buf + 32, "001", 4);
983
984	len = sizeof(buf);
985	if (len > acmd[4])
986		len = acmd[4];
987	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
988	write_prdt(p, slot, cfis, buf, len);
989	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
990}
991
992static void
993atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
994{
995	uint8_t buf[8];
996	uint64_t sectors;
997
998	sectors = blockif_size(p->bctx) / 2048;
999	be32enc(buf, sectors - 1);
1000	be32enc(buf + 4, 2048);
1001	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1002	write_prdt(p, slot, cfis, buf, sizeof(buf));
1003	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1004}
1005
1006static void
1007atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1008{
1009	uint8_t *acmd;
1010	uint8_t format;
1011	int len;
1012
1013	acmd = cfis + 0x40;
1014
1015	len = be16dec(acmd + 7);
1016	format = acmd[9] >> 6;
1017	switch (format) {
1018	case 0:
1019	{
1020		int msf, size;
1021		uint64_t sectors;
1022		uint8_t start_track, buf[20], *bp;
1023
1024		msf = (acmd[1] >> 1) & 1;
1025		start_track = acmd[6];
1026		if (start_track > 1 && start_track != 0xaa) {
1027			uint32_t tfd;
1028			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1029			p->asc = 0x24;
1030			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1031			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1032			ahci_write_fis_d2h(p, slot, cfis, tfd);
1033			return;
1034		}
1035		bp = buf + 2;
1036		*bp++ = 1;
1037		*bp++ = 1;
1038		if (start_track <= 1) {
1039			*bp++ = 0;
1040			*bp++ = 0x14;
1041			*bp++ = 1;
1042			*bp++ = 0;
1043			if (msf) {
1044				*bp++ = 0;
1045				lba_to_msf(bp, 0);
1046				bp += 3;
1047			} else {
1048				*bp++ = 0;
1049				*bp++ = 0;
1050				*bp++ = 0;
1051				*bp++ = 0;
1052			}
1053		}
1054		*bp++ = 0;
1055		*bp++ = 0x14;
1056		*bp++ = 0xaa;
1057		*bp++ = 0;
1058		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1059		sectors >>= 2;
1060		if (msf) {
1061			*bp++ = 0;
1062			lba_to_msf(bp, sectors);
1063			bp += 3;
1064		} else {
1065			be32enc(bp, sectors);
1066			bp += 4;
1067		}
1068		size = bp - buf;
1069		be16enc(buf, size - 2);
1070		if (len > size)
1071			len = size;
1072		write_prdt(p, slot, cfis, buf, len);
1073		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1074		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1075		break;
1076	}
1077	case 1:
1078	{
1079		uint8_t buf[12];
1080
1081		memset(buf, 0, sizeof(buf));
1082		buf[1] = 0xa;
1083		buf[2] = 0x1;
1084		buf[3] = 0x1;
1085		if (len > sizeof(buf))
1086			len = sizeof(buf);
1087		write_prdt(p, slot, cfis, buf, len);
1088		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1089		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1090		break;
1091	}
1092	case 2:
1093	{
1094		int msf, size;
1095		uint64_t sectors;
1096		uint8_t start_track, *bp, buf[50];
1097
1098		msf = (acmd[1] >> 1) & 1;
1099		start_track = acmd[6];
1100		bp = buf + 2;
1101		*bp++ = 1;
1102		*bp++ = 1;
1103
1104		*bp++ = 1;
1105		*bp++ = 0x14;
1106		*bp++ = 0;
1107		*bp++ = 0xa0;
1108		*bp++ = 0;
1109		*bp++ = 0;
1110		*bp++ = 0;
1111		*bp++ = 0;
1112		*bp++ = 1;
1113		*bp++ = 0;
1114		*bp++ = 0;
1115
1116		*bp++ = 1;
1117		*bp++ = 0x14;
1118		*bp++ = 0;
1119		*bp++ = 0xa1;
1120		*bp++ = 0;
1121		*bp++ = 0;
1122		*bp++ = 0;
1123		*bp++ = 0;
1124		*bp++ = 1;
1125		*bp++ = 0;
1126		*bp++ = 0;
1127
1128		*bp++ = 1;
1129		*bp++ = 0x14;
1130		*bp++ = 0;
1131		*bp++ = 0xa2;
1132		*bp++ = 0;
1133		*bp++ = 0;
1134		*bp++ = 0;
1135		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1136		sectors >>= 2;
1137		if (msf) {
1138			*bp++ = 0;
1139			lba_to_msf(bp, sectors);
1140			bp += 3;
1141		} else {
1142			be32enc(bp, sectors);
1143			bp += 4;
1144		}
1145
1146		*bp++ = 1;
1147		*bp++ = 0x14;
1148		*bp++ = 0;
1149		*bp++ = 1;
1150		*bp++ = 0;
1151		*bp++ = 0;
1152		*bp++ = 0;
1153		if (msf) {
1154			*bp++ = 0;
1155			lba_to_msf(bp, 0);
1156			bp += 3;
1157		} else {
1158			*bp++ = 0;
1159			*bp++ = 0;
1160			*bp++ = 0;
1161			*bp++ = 0;
1162		}
1163
1164		size = bp - buf;
1165		be16enc(buf, size - 2);
1166		if (len > size)
1167			len = size;
1168		write_prdt(p, slot, cfis, buf, len);
1169		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1170		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1171		break;
1172	}
1173	default:
1174	{
1175		uint32_t tfd;
1176
1177		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1178		p->asc = 0x24;
1179		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1180		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1181		ahci_write_fis_d2h(p, slot, cfis, tfd);
1182		break;
1183	}
1184	}
1185}
1186
1187static void
1188atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
1189		uint32_t done, int seek)
1190{
1191	struct ahci_ioreq *aior;
1192	struct ahci_cmd_hdr *hdr;
1193	struct ahci_prdt_entry *prdt;
1194	struct blockif_req *breq;
1195	struct pci_ahci_softc *sc;
1196	uint8_t *acmd;
1197	uint64_t lba;
1198	uint32_t len;
1199	int i, err, iovcnt;
1200
1201	sc = p->pr_sc;
1202	acmd = cfis + 0x40;
1203	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1204	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1205
1206	prdt += seek;
1207	lba = be32dec(acmd + 2);
1208	if (acmd[0] == READ_10)
1209		len = be16dec(acmd + 7);
1210	else
1211		len = be32dec(acmd + 6);
1212	if (len == 0) {
1213		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1214		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1215	}
1216	lba *= 2048;
1217	len *= 2048;
1218
1219	/*
1220	 * Pull request off free list
1221	 */
1222	aior = STAILQ_FIRST(&p->iofhd);
1223	assert(aior != NULL);
1224	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1225	aior->cfis = cfis;
1226	aior->slot = slot;
1227	aior->len = len;
1228	aior->done = done;
1229	breq = &aior->io_req;
1230	breq->br_offset = lba + done;
1231	iovcnt = hdr->prdtl - seek;
1232	if (iovcnt > BLOCKIF_IOV_MAX) {
1233		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
1234		iovcnt = BLOCKIF_IOV_MAX;
1235	} else
1236		aior->prdtl = 0;
1237	breq->br_iovcnt = iovcnt;
1238
1239	/*
1240	 * Mark this command in-flight.
1241	 */
1242	p->pending |= 1 << slot;
1243
1244	/*
1245	 * Stuff request onto busy list
1246	 */
1247	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1248
1249	/*
1250	 * Build up the iovec based on the prdt
1251	 */
1252	for (i = 0; i < iovcnt; i++) {
1253		uint32_t dbcsz;
1254
1255		dbcsz = (prdt->dbc & DBCMASK) + 1;
1256		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
1257		    prdt->dba, dbcsz);
1258		breq->br_iov[i].iov_len = dbcsz;
1259		aior->done += dbcsz;
1260		prdt++;
1261	}
1262	err = blockif_read(p->bctx, breq);
1263	assert(err == 0);
1264}
1265
1266static void
1267atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1268{
1269	uint8_t buf[64];
1270	uint8_t *acmd;
1271	int len;
1272
1273	acmd = cfis + 0x40;
1274	len = acmd[4];
1275	if (len > sizeof(buf))
1276		len = sizeof(buf);
1277	memset(buf, 0, len);
1278	buf[0] = 0x70 | (1 << 7);
1279	buf[2] = p->sense_key;
1280	buf[7] = 10;
1281	buf[12] = p->asc;
1282	write_prdt(p, slot, cfis, buf, len);
1283	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1284	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1285}
1286
1287static void
1288atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1289{
1290	uint8_t *acmd = cfis + 0x40;
1291	uint32_t tfd;
1292
1293	switch (acmd[4] & 3) {
1294	case 0:
1295	case 1:
1296	case 3:
1297		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1298		tfd = ATA_S_READY | ATA_S_DSC;
1299		break;
1300	case 2:
1301		/* TODO eject media */
1302		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1303		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1304		p->asc = 0x53;
1305		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1306		break;
1307	}
1308	ahci_write_fis_d2h(p, slot, cfis, tfd);
1309}
1310
1311static void
1312atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1313{
1314	uint8_t *acmd;
1315	uint32_t tfd;
1316	uint8_t pc, code;
1317	int len;
1318
1319	acmd = cfis + 0x40;
1320	len = be16dec(acmd + 7);
1321	pc = acmd[2] >> 6;
1322	code = acmd[2] & 0x3f;
1323
1324	switch (pc) {
1325	case 0:
1326		switch (code) {
1327		case MODEPAGE_RW_ERROR_RECOVERY:
1328		{
1329			uint8_t buf[16];
1330
1331			if (len > sizeof(buf))
1332				len = sizeof(buf);
1333
1334			memset(buf, 0, sizeof(buf));
1335			be16enc(buf, 16 - 2);
1336			buf[2] = 0x70;
1337			buf[8] = 0x01;
1338			buf[9] = 16 - 10;
1339			buf[11] = 0x05;
1340			write_prdt(p, slot, cfis, buf, len);
1341			tfd = ATA_S_READY | ATA_S_DSC;
1342			break;
1343		}
1344		case MODEPAGE_CD_CAPABILITIES:
1345		{
1346			uint8_t buf[30];
1347
1348			if (len > sizeof(buf))
1349				len = sizeof(buf);
1350
1351			memset(buf, 0, sizeof(buf));
1352			be16enc(buf, 30 - 2);
1353			buf[2] = 0x70;
1354			buf[8] = 0x2A;
1355			buf[9] = 30 - 10;
1356			buf[10] = 0x08;
1357			buf[12] = 0x71;
1358			be16enc(&buf[18], 2);
1359			be16enc(&buf[20], 512);
1360			write_prdt(p, slot, cfis, buf, len);
1361			tfd = ATA_S_READY | ATA_S_DSC;
1362			break;
1363		}
1364		default:
1365			goto error;
1366			break;
1367		}
1368		break;
1369	case 3:
1370		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1371		p->asc = 0x39;
1372		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1373		break;
1374error:
1375	case 1:
1376	case 2:
1377		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1378		p->asc = 0x24;
1379		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1380		break;
1381	}
1382	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1383	ahci_write_fis_d2h(p, slot, cfis, tfd);
1384}
1385
1386static void
1387atapi_get_event_status_notification(struct ahci_port *p, int slot,
1388    uint8_t *cfis)
1389{
1390	uint8_t *acmd;
1391	uint32_t tfd;
1392
1393	acmd = cfis + 0x40;
1394
1395	/* we don't support asynchronous operation */
1396	if (!(acmd[1] & 1)) {
1397		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1398		p->asc = 0x24;
1399		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1400	} else {
1401		uint8_t buf[8];
1402		int len;
1403
1404		len = be16dec(acmd + 7);
1405		if (len > sizeof(buf))
1406			len = sizeof(buf);
1407
1408		memset(buf, 0, sizeof(buf));
1409		be16enc(buf, 8 - 2);
1410		buf[2] = 0x04;
1411		buf[3] = 0x10;
1412		buf[5] = 0x02;
1413		write_prdt(p, slot, cfis, buf, len);
1414		tfd = ATA_S_READY | ATA_S_DSC;
1415	}
1416	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1417	ahci_write_fis_d2h(p, slot, cfis, tfd);
1418}
1419
1420static void
1421handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1422{
1423	uint8_t *acmd;
1424
1425	acmd = cfis + 0x40;
1426
1427#ifdef AHCI_DEBUG
1428	{
1429		int i;
1430		DPRINTF("ACMD:");
1431		for (i = 0; i < 16; i++)
1432			DPRINTF("%02x ", acmd[i]);
1433		DPRINTF("\n");
1434	}
1435#endif
1436
1437	switch (acmd[0]) {
1438	case TEST_UNIT_READY:
1439		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1440		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1441		break;
1442	case INQUIRY:
1443		atapi_inquiry(p, slot, cfis);
1444		break;
1445	case READ_CAPACITY:
1446		atapi_read_capacity(p, slot, cfis);
1447		break;
1448	case PREVENT_ALLOW:
1449		/* TODO */
1450		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1451		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1452		break;
1453	case READ_TOC:
1454		atapi_read_toc(p, slot, cfis);
1455		break;
1456	case READ_10:
1457	case READ_12:
1458		atapi_read(p, slot, cfis, 0, 0);
1459		break;
1460	case REQUEST_SENSE:
1461		atapi_request_sense(p, slot, cfis);
1462		break;
1463	case START_STOP_UNIT:
1464		atapi_start_stop_unit(p, slot, cfis);
1465		break;
1466	case MODE_SENSE_10:
1467		atapi_mode_sense(p, slot, cfis);
1468		break;
1469	case GET_EVENT_STATUS_NOTIFICATION:
1470		atapi_get_event_status_notification(p, slot, cfis);
1471		break;
1472	default:
1473		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1474		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1475		p->asc = 0x20;
1476		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1477				ATA_S_READY | ATA_S_ERROR);
1478		break;
1479	}
1480}
1481
1482static void
1483ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1484{
1485
1486	switch (cfis[2]) {
1487	case ATA_ATA_IDENTIFY:
1488		handle_identify(p, slot, cfis);
1489		break;
1490	case ATA_SETFEATURES:
1491	{
1492		switch (cfis[3]) {
1493		case ATA_SF_ENAB_SATA_SF:
1494			switch (cfis[12]) {
1495			case ATA_SATA_SF_AN:
1496				p->tfd = ATA_S_DSC | ATA_S_READY;
1497				break;
1498			default:
1499				p->tfd = ATA_S_ERROR | ATA_S_READY;
1500				p->tfd |= (ATA_ERROR_ABORT << 8);
1501				break;
1502			}
1503			break;
1504		case ATA_SF_ENAB_WCACHE:
1505		case ATA_SF_DIS_WCACHE:
1506		case ATA_SF_ENAB_RCACHE:
1507		case ATA_SF_DIS_RCACHE:
1508			p->tfd = ATA_S_DSC | ATA_S_READY;
1509			break;
1510		case ATA_SF_SETXFER:
1511		{
1512			switch (cfis[12] & 0xf8) {
1513			case ATA_PIO:
1514			case ATA_PIO0:
1515				break;
1516			case ATA_WDMA0:
1517			case ATA_UDMA0:
1518				p->xfermode = (cfis[12] & 0x7);
1519				break;
1520			}
1521			p->tfd = ATA_S_DSC | ATA_S_READY;
1522			break;
1523		}
1524		default:
1525			p->tfd = ATA_S_ERROR | ATA_S_READY;
1526			p->tfd |= (ATA_ERROR_ABORT << 8);
1527			break;
1528		}
1529		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1530		break;
1531	}
1532	case ATA_SET_MULTI:
1533		if (cfis[12] != 0 &&
1534			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1535			p->tfd = ATA_S_ERROR | ATA_S_READY;
1536			p->tfd |= (ATA_ERROR_ABORT << 8);
1537		} else {
1538			p->mult_sectors = cfis[12];
1539			p->tfd = ATA_S_DSC | ATA_S_READY;
1540		}
1541		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1542		break;
1543	case ATA_READ:
1544	case ATA_WRITE:
1545	case ATA_READ48:
1546	case ATA_WRITE48:
1547	case ATA_READ_MUL:
1548	case ATA_WRITE_MUL:
1549	case ATA_READ_MUL48:
1550	case ATA_WRITE_MUL48:
1551	case ATA_READ_DMA:
1552	case ATA_WRITE_DMA:
1553	case ATA_READ_DMA48:
1554	case ATA_WRITE_DMA48:
1555	case ATA_READ_FPDMA_QUEUED:
1556	case ATA_WRITE_FPDMA_QUEUED:
1557		ahci_handle_dma(p, slot, cfis, 0, 0);
1558		break;
1559	case ATA_FLUSHCACHE:
1560	case ATA_FLUSHCACHE48:
1561		ahci_handle_flush(p, slot, cfis);
1562		break;
1563	case ATA_DATA_SET_MANAGEMENT:
1564		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1565		    cfis[13] == 0 && cfis[12] == 1) {
1566			ahci_handle_dsm_trim(p, slot, cfis, 0);
1567			break;
1568		}
1569		ahci_write_fis_d2h(p, slot, cfis,
1570		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1571		break;
1572	case ATA_SEND_FPDMA_QUEUED:
1573		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1574		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1575		    cfis[11] == 0 && cfis[13] == 1) {
1576			ahci_handle_dsm_trim(p, slot, cfis, 0);
1577			break;
1578		}
1579		ahci_write_fis_d2h(p, slot, cfis,
1580		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1581		break;
1582	case ATA_READ_LOG_EXT:
1583	case ATA_READ_LOG_DMA_EXT:
1584		ahci_handle_read_log(p, slot, cfis);
1585		break;
1586	case ATA_STANDBY_CMD:
1587		break;
1588	case ATA_NOP:
1589	case ATA_STANDBY_IMMEDIATE:
1590	case ATA_IDLE_IMMEDIATE:
1591	case ATA_SLEEP:
1592		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1593		break;
1594	case ATA_ATAPI_IDENTIFY:
1595		handle_atapi_identify(p, slot, cfis);
1596		break;
1597	case ATA_PACKET_CMD:
1598		if (!p->atapi) {
1599			ahci_write_fis_d2h(p, slot, cfis,
1600			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1601		} else
1602			handle_packet_cmd(p, slot, cfis);
1603		break;
1604	default:
1605		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1606		ahci_write_fis_d2h(p, slot, cfis,
1607		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1608		break;
1609	}
1610}
1611
1612static void
1613ahci_handle_slot(struct ahci_port *p, int slot)
1614{
1615	struct ahci_cmd_hdr *hdr;
1616	struct ahci_prdt_entry *prdt;
1617	struct pci_ahci_softc *sc;
1618	uint8_t *cfis;
1619	int cfl;
1620
1621	sc = p->pr_sc;
1622	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1623	cfl = (hdr->flags & 0x1f) * 4;
1624	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1625			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1626	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1627
1628#ifdef AHCI_DEBUG
1629	DPRINTF("\ncfis:");
1630	for (i = 0; i < cfl; i++) {
1631		if (i % 10 == 0)
1632			DPRINTF("\n");
1633		DPRINTF("%02x ", cfis[i]);
1634	}
1635	DPRINTF("\n");
1636
1637	for (i = 0; i < hdr->prdtl; i++) {
1638		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1639		prdt++;
1640	}
1641#endif
1642
1643	if (cfis[0] != FIS_TYPE_REGH2D) {
1644		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1645		return;
1646	}
1647
1648	if (cfis[1] & 0x80) {
1649		ahci_handle_cmd(p, slot, cfis);
1650	} else {
1651		if (cfis[15] & (1 << 2))
1652			p->reset = 1;
1653		else if (p->reset) {
1654			p->reset = 0;
1655			ahci_port_reset(p);
1656		}
1657		p->ci &= ~(1 << slot);
1658	}
1659}
1660
1661static void
1662ahci_handle_port(struct ahci_port *p)
1663{
1664	int i;
1665
1666	if (!(p->cmd & AHCI_P_CMD_ST))
1667		return;
1668
1669	/*
1670	 * Search for any new commands to issue ignoring those that
1671	 * are already in-flight.
1672	 */
1673	for (i = 0; (i < 32) && p->ci; i++) {
1674		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1675			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1676			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1677			ahci_handle_slot(p, i);
1678		}
1679	}
1680}
1681
1682/*
1683 * blockif callback routine - this runs in the context of the blockif
1684 * i/o thread, so the mutex needs to be acquired.
1685 */
1686static void
1687ata_ioreq_cb(struct blockif_req *br, int err)
1688{
1689	struct ahci_cmd_hdr *hdr;
1690	struct ahci_ioreq *aior;
1691	struct ahci_port *p;
1692	struct pci_ahci_softc *sc;
1693	uint32_t tfd;
1694	uint8_t *cfis;
1695	int pending, slot, ncq, dsm;
1696
1697	DPRINTF("%s %d\n", __func__, err);
1698
1699	ncq = dsm = 0;
1700	aior = br->br_param;
1701	p = aior->io_pr;
1702	cfis = aior->cfis;
1703	slot = aior->slot;
1704	pending = aior->prdtl;
1705	sc = p->pr_sc;
1706	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1707
1708	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1709	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1710	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1711		ncq = 1;
1712	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1713	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1714	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1715		dsm = 1;
1716
1717	pthread_mutex_lock(&sc->mtx);
1718
1719	/*
1720	 * Delete the blockif request from the busy list
1721	 */
1722	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1723
1724	/*
1725	 * Move the blockif request back to the free list
1726	 */
1727	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1728
1729	if (!err)
1730		hdr->prdbc = aior->done;
1731
1732	if (dsm) {
1733		if (aior->done != aior->len && !err) {
1734			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1735			goto out;
1736		}
1737	} else {
1738		if (pending && !err) {
1739			ahci_handle_dma(p, slot, cfis, aior->done,
1740			    hdr->prdtl - pending);
1741			goto out;
1742		}
1743	}
1744
1745	if (!err && aior->done == aior->len) {
1746		tfd = ATA_S_READY | ATA_S_DSC;
1747	} else {
1748		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1749	}
1750
1751	if (ncq)
1752		ahci_write_fis_sdb(p, slot, cfis, tfd);
1753	else
1754		ahci_write_fis_d2h(p, slot, cfis, tfd);
1755
1756	/*
1757	 * This command is now complete.
1758	 */
1759	p->pending &= ~(1 << slot);
1760
1761	ahci_check_stopped(p);
1762out:
1763	pthread_mutex_unlock(&sc->mtx);
1764	DPRINTF("%s exit\n", __func__);
1765}
1766
1767static void
1768atapi_ioreq_cb(struct blockif_req *br, int err)
1769{
1770	struct ahci_cmd_hdr *hdr;
1771	struct ahci_ioreq *aior;
1772	struct ahci_port *p;
1773	struct pci_ahci_softc *sc;
1774	uint8_t *cfis;
1775	uint32_t tfd;
1776	int pending, slot;
1777
1778	DPRINTF("%s %d\n", __func__, err);
1779
1780	aior = br->br_param;
1781	p = aior->io_pr;
1782	cfis = aior->cfis;
1783	slot = aior->slot;
1784	pending = aior->prdtl;
1785	sc = p->pr_sc;
1786	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1787
1788	pthread_mutex_lock(&sc->mtx);
1789
1790	/*
1791	 * Delete the blockif request from the busy list
1792	 */
1793	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1794
1795	/*
1796	 * Move the blockif request back to the free list
1797	 */
1798	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1799
1800	if (!err)
1801		hdr->prdbc = aior->done;
1802
1803	if (pending && !err) {
1804		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1805		goto out;
1806	}
1807
1808	if (!err && aior->done == aior->len) {
1809		tfd = ATA_S_READY | ATA_S_DSC;
1810	} else {
1811		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1812		p->asc = 0x21;
1813		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1814	}
1815
1816	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1817	ahci_write_fis_d2h(p, slot, cfis, tfd);
1818
1819	/*
1820	 * This command is now complete.
1821	 */
1822	p->pending &= ~(1 << slot);
1823
1824	ahci_check_stopped(p);
1825out:
1826	pthread_mutex_unlock(&sc->mtx);
1827	DPRINTF("%s exit\n", __func__);
1828}
1829
1830static void
1831pci_ahci_ioreq_init(struct ahci_port *pr)
1832{
1833	struct ahci_ioreq *vr;
1834	int i;
1835
1836	pr->ioqsz = blockif_queuesz(pr->bctx);
1837	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1838	STAILQ_INIT(&pr->iofhd);
1839
1840	/*
1841	 * Add all i/o request entries to the free queue
1842	 */
1843	for (i = 0; i < pr->ioqsz; i++) {
1844		vr = &pr->ioreq[i];
1845		vr->io_pr = pr;
1846		if (!pr->atapi)
1847			vr->io_req.br_callback = ata_ioreq_cb;
1848		else
1849			vr->io_req.br_callback = atapi_ioreq_cb;
1850		vr->io_req.br_param = vr;
1851		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1852	}
1853
1854	TAILQ_INIT(&pr->iobhd);
1855}
1856
1857static void
1858pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1859{
1860	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1861	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1862	struct ahci_port *p = &sc->port[port];
1863
1864	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1865		port, offset, value);
1866
1867	switch (offset) {
1868	case AHCI_P_CLB:
1869		p->clb = value;
1870		break;
1871	case AHCI_P_CLBU:
1872		p->clbu = value;
1873		break;
1874	case AHCI_P_FB:
1875		p->fb = value;
1876		break;
1877	case AHCI_P_FBU:
1878		p->fbu = value;
1879		break;
1880	case AHCI_P_IS:
1881		p->is &= ~value;
1882		break;
1883	case AHCI_P_IE:
1884		p->ie = value & 0xFDC000FF;
1885		ahci_generate_intr(sc);
1886		break;
1887	case AHCI_P_CMD:
1888	{
1889		p->cmd = value;
1890
1891		if (!(value & AHCI_P_CMD_ST)) {
1892			ahci_port_stop(p);
1893		} else {
1894			uint64_t clb;
1895
1896			p->cmd |= AHCI_P_CMD_CR;
1897			clb = (uint64_t)p->clbu << 32 | p->clb;
1898			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1899					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1900		}
1901
1902		if (value & AHCI_P_CMD_FRE) {
1903			uint64_t fb;
1904
1905			p->cmd |= AHCI_P_CMD_FR;
1906			fb = (uint64_t)p->fbu << 32 | p->fb;
1907			/* we don't support FBSCP, so rfis size is 256Bytes */
1908			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1909		} else {
1910			p->cmd &= ~AHCI_P_CMD_FR;
1911		}
1912
1913		if (value & AHCI_P_CMD_CLO) {
1914			p->tfd = 0;
1915			p->cmd &= ~AHCI_P_CMD_CLO;
1916		}
1917
1918		ahci_handle_port(p);
1919		break;
1920	}
1921	case AHCI_P_TFD:
1922	case AHCI_P_SIG:
1923	case AHCI_P_SSTS:
1924		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1925		break;
1926	case AHCI_P_SCTL:
1927		p->sctl = value;
1928		if (!(p->cmd & AHCI_P_CMD_ST)) {
1929			if (value & ATA_SC_DET_RESET)
1930				ahci_port_reset(p);
1931		}
1932		break;
1933	case AHCI_P_SERR:
1934		p->serr &= ~value;
1935		break;
1936	case AHCI_P_SACT:
1937		p->sact |= value;
1938		break;
1939	case AHCI_P_CI:
1940		p->ci |= value;
1941		ahci_handle_port(p);
1942		break;
1943	case AHCI_P_SNTF:
1944	case AHCI_P_FBS:
1945	default:
1946		break;
1947	}
1948}
1949
1950static void
1951pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1952{
1953	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1954		offset, value);
1955
1956	switch (offset) {
1957	case AHCI_CAP:
1958	case AHCI_PI:
1959	case AHCI_VS:
1960	case AHCI_CAP2:
1961		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1962		break;
1963	case AHCI_GHC:
1964		if (value & AHCI_GHC_HR)
1965			ahci_reset(sc);
1966		else if (value & AHCI_GHC_IE) {
1967			sc->ghc |= AHCI_GHC_IE;
1968			ahci_generate_intr(sc);
1969		}
1970		break;
1971	case AHCI_IS:
1972		sc->is &= ~value;
1973		ahci_generate_intr(sc);
1974		break;
1975	default:
1976		break;
1977	}
1978}
1979
1980static void
1981pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1982		int baridx, uint64_t offset, int size, uint64_t value)
1983{
1984	struct pci_ahci_softc *sc = pi->pi_arg;
1985
1986	assert(baridx == 5);
1987	assert(size == 4);
1988
1989	pthread_mutex_lock(&sc->mtx);
1990
1991	if (offset < AHCI_OFFSET)
1992		pci_ahci_host_write(sc, offset, value);
1993	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1994		pci_ahci_port_write(sc, offset, value);
1995	else
1996		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1997
1998	pthread_mutex_unlock(&sc->mtx);
1999}
2000
2001static uint64_t
2002pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2003{
2004	uint32_t value;
2005
2006	switch (offset) {
2007	case AHCI_CAP:
2008	case AHCI_GHC:
2009	case AHCI_IS:
2010	case AHCI_PI:
2011	case AHCI_VS:
2012	case AHCI_CCCC:
2013	case AHCI_CCCP:
2014	case AHCI_EM_LOC:
2015	case AHCI_EM_CTL:
2016	case AHCI_CAP2:
2017	{
2018		uint32_t *p = &sc->cap;
2019		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2020		value = *p;
2021		break;
2022	}
2023	default:
2024		value = 0;
2025		break;
2026	}
2027	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2028		offset, value);
2029
2030	return (value);
2031}
2032
2033static uint64_t
2034pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2035{
2036	uint32_t value;
2037	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2038	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2039
2040	switch (offset) {
2041	case AHCI_P_CLB:
2042	case AHCI_P_CLBU:
2043	case AHCI_P_FB:
2044	case AHCI_P_FBU:
2045	case AHCI_P_IS:
2046	case AHCI_P_IE:
2047	case AHCI_P_CMD:
2048	case AHCI_P_TFD:
2049	case AHCI_P_SIG:
2050	case AHCI_P_SSTS:
2051	case AHCI_P_SCTL:
2052	case AHCI_P_SERR:
2053	case AHCI_P_SACT:
2054	case AHCI_P_CI:
2055	case AHCI_P_SNTF:
2056	case AHCI_P_FBS:
2057	{
2058		uint32_t *p= &sc->port[port].clb;
2059		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2060		value = *p;
2061		break;
2062	}
2063	default:
2064		value = 0;
2065		break;
2066	}
2067
2068	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2069		port, offset, value);
2070
2071	return value;
2072}
2073
2074static uint64_t
2075pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2076    uint64_t offset, int size)
2077{
2078	struct pci_ahci_softc *sc = pi->pi_arg;
2079	uint32_t value;
2080
2081	assert(baridx == 5);
2082	assert(size == 4);
2083
2084	pthread_mutex_lock(&sc->mtx);
2085
2086	if (offset < AHCI_OFFSET)
2087		value = pci_ahci_host_read(sc, offset);
2088	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2089		value = pci_ahci_port_read(sc, offset);
2090	else {
2091		value = 0;
2092		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
2093	}
2094
2095	pthread_mutex_unlock(&sc->mtx);
2096
2097	return (value);
2098}
2099
2100static int
2101pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2102{
2103	char bident[sizeof("XX:X:X")];
2104	struct blockif_ctxt *bctxt;
2105	struct pci_ahci_softc *sc;
2106	int ret, slots;
2107
2108	ret = 0;
2109
2110	if (opts == NULL) {
2111		fprintf(stderr, "pci_ahci: backing device required\n");
2112		return (1);
2113	}
2114
2115#ifdef AHCI_DEBUG
2116	dbg = fopen("/tmp/log", "w+");
2117#endif
2118
2119	sc = calloc(1, sizeof(struct pci_ahci_softc));
2120	pi->pi_arg = sc;
2121	sc->asc_pi = pi;
2122	sc->ports = MAX_PORTS;
2123
2124	/*
2125	 * Only use port 0 for a backing device. All other ports will be
2126	 * marked as unused
2127	 */
2128	sc->port[0].atapi = atapi;
2129
2130	/*
2131	 * Attempt to open the backing image. Use the PCI
2132	 * slot/func for the identifier string.
2133	 */
2134	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2135	bctxt = blockif_open(opts, bident);
2136	if (bctxt == NULL) {
2137		ret = 1;
2138		goto open_fail;
2139	}
2140	sc->port[0].bctx = bctxt;
2141	sc->port[0].pr_sc = sc;
2142
2143	/*
2144	 * Allocate blockif request structures and add them
2145	 * to the free list
2146	 */
2147	pci_ahci_ioreq_init(&sc->port[0]);
2148
2149	pthread_mutex_init(&sc->mtx, NULL);
2150
2151	/* Intel ICH8 AHCI */
2152	slots = sc->port[0].ioqsz;
2153	if (slots > 32)
2154		slots = 32;
2155	--slots;
2156	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2157	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2158	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2159	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2160	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2161
2162	/* Only port 0 implemented */
2163	sc->pi = 1;
2164	sc->vs = 0x10300;
2165	sc->cap2 = AHCI_CAP2_APST;
2166	ahci_reset(sc);
2167
2168	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2169	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2170	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2171	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2172	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2173	pci_emul_add_msicap(pi, 1);
2174	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2175	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2176
2177	pci_lintr_request(pi);
2178
2179open_fail:
2180	if (ret) {
2181		blockif_close(sc->port[0].bctx);
2182		free(sc);
2183	}
2184
2185	return (ret);
2186}
2187
2188static int
2189pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2190{
2191
2192	return (pci_ahci_init(ctx, pi, opts, 0));
2193}
2194
2195static int
2196pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2197{
2198
2199	return (pci_ahci_init(ctx, pi, opts, 1));
2200}
2201
2202/*
2203 * Use separate emulation names to distinguish drive and atapi devices
2204 */
2205struct pci_devemu pci_de_ahci_hd = {
2206	.pe_emu =	"ahci-hd",
2207	.pe_init =	pci_ahci_hd_init,
2208	.pe_barwrite =	pci_ahci_write,
2209	.pe_barread =	pci_ahci_read
2210};
2211PCI_EMUL_SET(pci_de_ahci_hd);
2212
2213struct pci_devemu pci_de_ahci_cd = {
2214	.pe_emu =	"ahci-cd",
2215	.pe_init =	pci_ahci_atapi_init,
2216	.pe_barwrite =	pci_ahci_write,
2217	.pe_barread =	pci_ahci_read
2218};
2219PCI_EMUL_SET(pci_de_ahci_cd);
2220