pci_ahci.c revision 282845
13812Ssos/*-
23812Ssos * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
33812Ssos * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 282845 2015-05-13 10:30:53Z mav $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 282845 2015-05-13 10:30:53Z mav $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <pthread_np.h>
52#include <inttypes.h>
53#include <md5.h>
54
55#include "bhyverun.h"
56#include "pci_emul.h"
57#include "ahci.h"
58#include "block_if.h"
59
60#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
61
62#define	PxSIG_ATA	0x00000101 /* ATA drive */
63#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
64
65enum sata_fis_type {
66	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
67	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
68	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
69	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
70	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
71	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
72	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
73	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
74};
75
76/*
77 * SCSI opcodes
78 */
79#define	TEST_UNIT_READY		0x00
80#define	REQUEST_SENSE		0x03
81#define	INQUIRY			0x12
82#define	START_STOP_UNIT		0x1B
83#define	PREVENT_ALLOW		0x1E
84#define	READ_CAPACITY		0x25
85#define	READ_10			0x28
86#define	POSITION_TO_ELEMENT	0x2B
87#define	READ_TOC		0x43
88#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
89#define	MODE_SENSE_10		0x5A
90#define	REPORT_LUNS		0xA0
91#define	READ_12			0xA8
92#define	READ_CD			0xBE
93
94/*
95 * SCSI mode page codes
96 */
97#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
98#define	MODEPAGE_CD_CAPABILITIES	0x2A
99
100/*
101 * ATA commands
102 */
103#define	ATA_SF_ENAB_SATA_SF		0x10
104#define		ATA_SATA_SF_AN		0x05
105#define	ATA_SF_DIS_SATA_SF		0x90
106
107/*
108 * Debug printf
109 */
110#ifdef AHCI_DEBUG
111static FILE *dbg;
112#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
113#else
114#define DPRINTF(format, arg...)
115#endif
116#define WPRINTF(format, arg...) printf(format, ##arg)
117
118struct ahci_ioreq {
119	struct blockif_req io_req;
120	struct ahci_port *io_pr;
121	STAILQ_ENTRY(ahci_ioreq) io_flist;
122	TAILQ_ENTRY(ahci_ioreq) io_blist;
123	uint8_t *cfis;
124	uint32_t len;
125	uint32_t done;
126	int slot;
127	int more;
128};
129
130struct ahci_port {
131	struct blockif_ctxt *bctx;
132	struct pci_ahci_softc *pr_sc;
133	uint8_t *cmd_lst;
134	uint8_t *rfis;
135	char ident[20 + 1];
136	int atapi;
137	int reset;
138	int mult_sectors;
139	uint8_t xfermode;
140	uint8_t err_cfis[20];
141	uint8_t sense_key;
142	uint8_t asc;
143	uint32_t pending;
144
145	uint32_t clb;
146	uint32_t clbu;
147	uint32_t fb;
148	uint32_t fbu;
149	uint32_t is;
150	uint32_t ie;
151	uint32_t cmd;
152	uint32_t unused0;
153	uint32_t tfd;
154	uint32_t sig;
155	uint32_t ssts;
156	uint32_t sctl;
157	uint32_t serr;
158	uint32_t sact;
159	uint32_t ci;
160	uint32_t sntf;
161	uint32_t fbs;
162
163	/*
164	 * i/o request info
165	 */
166	struct ahci_ioreq *ioreq;
167	int ioqsz;
168	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
169	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
170};
171
172struct ahci_cmd_hdr {
173	uint16_t flags;
174	uint16_t prdtl;
175	uint32_t prdbc;
176	uint64_t ctba;
177	uint32_t reserved[4];
178};
179
180struct ahci_prdt_entry {
181	uint64_t dba;
182	uint32_t reserved;
183#define	DBCMASK		0x3fffff
184	uint32_t dbc;
185};
186
187struct pci_ahci_softc {
188	struct pci_devinst *asc_pi;
189	pthread_mutex_t	mtx;
190	int ports;
191	uint32_t cap;
192	uint32_t ghc;
193	uint32_t is;
194	uint32_t pi;
195	uint32_t vs;
196	uint32_t ccc_ctl;
197	uint32_t ccc_pts;
198	uint32_t em_loc;
199	uint32_t em_ctl;
200	uint32_t cap2;
201	uint32_t bohc;
202	uint32_t lintr;
203	struct ahci_port port[MAX_PORTS];
204};
205#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
206
207static inline void lba_to_msf(uint8_t *buf, int lba)
208{
209	lba += 150;
210	buf[0] = (lba / 75) / 60;
211	buf[1] = (lba / 75) % 60;
212	buf[2] = lba % 75;
213}
214
215/*
216 * generate HBA intr depending on whether or not ports within
217 * the controller have an interrupt pending.
218 */
219static void
220ahci_generate_intr(struct pci_ahci_softc *sc)
221{
222	struct pci_devinst *pi;
223	int i;
224
225	pi = sc->asc_pi;
226
227	for (i = 0; i < sc->ports; i++) {
228		struct ahci_port *pr;
229		pr = &sc->port[i];
230		if (pr->is & pr->ie)
231			sc->is |= (1 << i);
232	}
233
234	DPRINTF("%s %x\n", __func__, sc->is);
235
236	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
237		if (pci_msi_enabled(pi)) {
238			/*
239			 * Generate an MSI interrupt on every edge
240			 */
241			pci_generate_msi(pi, 0);
242		} else if (!sc->lintr) {
243			/*
244			 * Only generate a pin-based interrupt if one wasn't
245			 * in progress
246			 */
247			sc->lintr = 1;
248			pci_lintr_assert(pi);
249		}
250	} else if (sc->lintr) {
251		/*
252		 * No interrupts: deassert pin-based signal if it had
253		 * been asserted
254		 */
255		pci_lintr_deassert(pi);
256		sc->lintr = 0;
257	}
258}
259
260static void
261ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
262{
263	int offset, len, irq;
264
265	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
266		return;
267
268	switch (ft) {
269	case FIS_TYPE_REGD2H:
270		offset = 0x40;
271		len = 20;
272		irq = AHCI_P_IX_DHR;
273		break;
274	case FIS_TYPE_SETDEVBITS:
275		offset = 0x58;
276		len = 8;
277		irq = AHCI_P_IX_SDB;
278		break;
279	case FIS_TYPE_PIOSETUP:
280		offset = 0x20;
281		len = 20;
282		irq = 0;
283		break;
284	default:
285		WPRINTF("unsupported fis type %d\n", ft);
286		return;
287	}
288	memcpy(p->rfis + offset, fis, len);
289	if (irq) {
290		p->is |= irq;
291		ahci_generate_intr(p->pr_sc);
292	}
293}
294
295static void
296ahci_write_fis_piosetup(struct ahci_port *p)
297{
298	uint8_t fis[20];
299
300	memset(fis, 0, sizeof(fis));
301	fis[0] = FIS_TYPE_PIOSETUP;
302	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
303}
304
305static void
306ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
307{
308	uint8_t fis[8];
309	uint8_t error;
310
311	error = (tfd >> 8) & 0xff;
312	memset(fis, 0, sizeof(fis));
313	fis[0] = FIS_TYPE_SETDEVBITS;
314	fis[1] = (1 << 6);
315	fis[2] = tfd & 0x77;
316	fis[3] = error;
317	if (fis[2] & ATA_S_ERROR) {
318		p->is |= AHCI_P_IX_TFE;
319		p->err_cfis[0] = slot;
320		p->err_cfis[2] = tfd & 0x77;
321		p->err_cfis[3] = error;
322		memcpy(&p->err_cfis[4], cfis + 4, 16);
323	} else {
324		*(uint32_t *)(fis + 4) = (1 << slot);
325		p->sact &= ~(1 << slot);
326	}
327	p->tfd = tfd;
328	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
329}
330
331static void
332ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
333{
334	uint8_t fis[20];
335	uint8_t error;
336
337	error = (tfd >> 8) & 0xff;
338	memset(fis, 0, sizeof(fis));
339	fis[0] = FIS_TYPE_REGD2H;
340	fis[1] = (1 << 6);
341	fis[2] = tfd & 0xff;
342	fis[3] = error;
343	fis[4] = cfis[4];
344	fis[5] = cfis[5];
345	fis[6] = cfis[6];
346	fis[7] = cfis[7];
347	fis[8] = cfis[8];
348	fis[9] = cfis[9];
349	fis[10] = cfis[10];
350	fis[11] = cfis[11];
351	fis[12] = cfis[12];
352	fis[13] = cfis[13];
353	if (fis[2] & ATA_S_ERROR) {
354		p->is |= AHCI_P_IX_TFE;
355		p->err_cfis[0] = 0x80;
356		p->err_cfis[2] = tfd & 0xff;
357		p->err_cfis[3] = error;
358		memcpy(&p->err_cfis[4], cfis + 4, 16);
359	} else
360		p->ci &= ~(1 << slot);
361	p->tfd = tfd;
362	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
363}
364
365static void
366ahci_write_reset_fis_d2h(struct ahci_port *p)
367{
368	uint8_t fis[20];
369
370	memset(fis, 0, sizeof(fis));
371	fis[0] = FIS_TYPE_REGD2H;
372	fis[3] = 1;
373	fis[4] = 1;
374	if (p->atapi) {
375		fis[5] = 0x14;
376		fis[6] = 0xeb;
377	}
378	fis[12] = 1;
379	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
380}
381
382static void
383ahci_check_stopped(struct ahci_port *p)
384{
385	/*
386	 * If we are no longer processing the command list and nothing
387	 * is in-flight, clear the running bit, the current command
388	 * slot, the command issue and active bits.
389	 */
390	if (!(p->cmd & AHCI_P_CMD_ST)) {
391		if (p->pending == 0) {
392			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
393			p->ci = 0;
394			p->sact = 0;
395		}
396	}
397}
398
399static void
400ahci_port_stop(struct ahci_port *p)
401{
402	struct ahci_ioreq *aior;
403	uint8_t *cfis;
404	int slot;
405	int ncq;
406	int error;
407
408	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
409
410	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
411		/*
412		 * Try to cancel the outstanding blockif request.
413		 */
414		error = blockif_cancel(p->bctx, &aior->io_req);
415		if (error != 0)
416			continue;
417
418		slot = aior->slot;
419		cfis = aior->cfis;
420		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
421		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
422		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
423			ncq = 1;
424
425		if (ncq)
426			p->sact &= ~(1 << slot);
427		else
428			p->ci &= ~(1 << slot);
429
430		/*
431		 * This command is now done.
432		 */
433		p->pending &= ~(1 << slot);
434
435		/*
436		 * Delete the blockif request from the busy list
437		 */
438		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
439
440		/*
441		 * Move the blockif request back to the free list
442		 */
443		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
444	}
445
446	ahci_check_stopped(p);
447}
448
449static void
450ahci_port_reset(struct ahci_port *pr)
451{
452	pr->serr = 0;
453	pr->sact = 0;
454	pr->xfermode = ATA_UDMA6;
455	pr->mult_sectors = 128;
456
457	if (!pr->bctx) {
458		pr->ssts = ATA_SS_DET_NO_DEVICE;
459		pr->sig = 0xFFFFFFFF;
460		pr->tfd = 0x7F;
461		return;
462	}
463	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
464	if (pr->sctl & ATA_SC_SPD_MASK)
465		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
466	else
467		pr->ssts |= ATA_SS_SPD_GEN3;
468	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
469	if (!pr->atapi) {
470		pr->sig = PxSIG_ATA;
471		pr->tfd |= ATA_S_READY;
472	} else
473		pr->sig = PxSIG_ATAPI;
474	ahci_write_reset_fis_d2h(pr);
475}
476
477static void
478ahci_reset(struct pci_ahci_softc *sc)
479{
480	int i;
481
482	sc->ghc = AHCI_GHC_AE;
483	sc->is = 0;
484
485	if (sc->lintr) {
486		pci_lintr_deassert(sc->asc_pi);
487		sc->lintr = 0;
488	}
489
490	for (i = 0; i < sc->ports; i++) {
491		sc->port[i].ie = 0;
492		sc->port[i].is = 0;
493		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
494		if (sc->port[i].bctx)
495			sc->port[i].cmd |= AHCI_P_CMD_CPS;
496		sc->port[i].sctl = 0;
497		ahci_port_reset(&sc->port[i]);
498	}
499}
500
501static void
502ata_string(uint8_t *dest, const char *src, int len)
503{
504	int i;
505
506	for (i = 0; i < len; i++) {
507		if (*src)
508			dest[i ^ 1] = *src++;
509		else
510			dest[i ^ 1] = ' ';
511	}
512}
513
514static void
515atapi_string(uint8_t *dest, const char *src, int len)
516{
517	int i;
518
519	for (i = 0; i < len; i++) {
520		if (*src)
521			dest[i] = *src++;
522		else
523			dest[i] = ' ';
524	}
525}
526
527/*
528 * Build up the iovec based on the PRDT, 'done' and 'len'.
529 */
530static void
531ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
532    struct ahci_prdt_entry *prdt, uint16_t prdtl)
533{
534	struct blockif_req *breq = &aior->io_req;
535	int i, j, skip, todo, left, extra;
536	uint32_t dbcsz;
537
538	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
539	skip = aior->done;
540	left = aior->len - aior->done;
541	todo = 0;
542	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
543	    i++, prdt++) {
544		dbcsz = (prdt->dbc & DBCMASK) + 1;
545		/* Skip already done part of the PRDT */
546		if (dbcsz <= skip) {
547			skip -= dbcsz;
548			continue;
549		}
550		dbcsz -= skip;
551		if (dbcsz > left)
552			dbcsz = left;
553		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
554		    prdt->dba + skip, dbcsz);
555		breq->br_iov[j].iov_len = dbcsz;
556		todo += dbcsz;
557		left -= dbcsz;
558		skip = 0;
559		j++;
560	}
561
562	/* If we got limited by IOV length, round I/O down to sector size. */
563	if (j == BLOCKIF_IOV_MAX) {
564		extra = todo % blockif_sectsz(p->bctx);
565		todo -= extra;
566		assert(todo > 0);
567		while (extra > 0) {
568			if (breq->br_iov[j - 1].iov_len > extra) {
569				breq->br_iov[j - 1].iov_len -= extra;
570				break;
571			}
572			extra -= breq->br_iov[j - 1].iov_len;
573			j--;
574		}
575	}
576
577	breq->br_iovcnt = j;
578	breq->br_resid = todo;
579	aior->done += todo;
580	aior->more = (aior->done < aior->len && i < prdtl);
581}
582
583static void
584ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
585{
586	struct ahci_ioreq *aior;
587	struct blockif_req *breq;
588	struct ahci_prdt_entry *prdt;
589	struct ahci_cmd_hdr *hdr;
590	uint64_t lba;
591	uint32_t len;
592	int err, ncq, readop;
593
594	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
595	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
596	ncq = 0;
597	readop = 1;
598
599	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
600	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
601	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
602	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
603		readop = 0;
604
605	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
606	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
607		lba = ((uint64_t)cfis[10] << 40) |
608			((uint64_t)cfis[9] << 32) |
609			((uint64_t)cfis[8] << 24) |
610			((uint64_t)cfis[6] << 16) |
611			((uint64_t)cfis[5] << 8) |
612			cfis[4];
613		len = cfis[11] << 8 | cfis[3];
614		if (!len)
615			len = 65536;
616		ncq = 1;
617	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
618	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
619	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
620		lba = ((uint64_t)cfis[10] << 40) |
621			((uint64_t)cfis[9] << 32) |
622			((uint64_t)cfis[8] << 24) |
623			((uint64_t)cfis[6] << 16) |
624			((uint64_t)cfis[5] << 8) |
625			cfis[4];
626		len = cfis[13] << 8 | cfis[12];
627		if (!len)
628			len = 65536;
629	} else {
630		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
631			(cfis[5] << 8) | cfis[4];
632		len = cfis[12];
633		if (!len)
634			len = 256;
635	}
636	lba *= blockif_sectsz(p->bctx);
637	len *= blockif_sectsz(p->bctx);
638
639	/* Pull request off free list */
640	aior = STAILQ_FIRST(&p->iofhd);
641	assert(aior != NULL);
642	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
643
644	aior->cfis = cfis;
645	aior->slot = slot;
646	aior->len = len;
647	aior->done = done;
648	breq = &aior->io_req;
649	breq->br_offset = lba + done;
650	ahci_build_iov(p, aior, prdt, hdr->prdtl);
651
652	/* Mark this command in-flight. */
653	p->pending |= 1 << slot;
654
655	/* Stuff request onto busy list. */
656	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
657
658	if (readop)
659		err = blockif_read(p->bctx, breq);
660	else
661		err = blockif_write(p->bctx, breq);
662	assert(err == 0);
663
664	if (ncq)
665		p->ci &= ~(1 << slot);
666}
667
668static void
669ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
670{
671	struct ahci_ioreq *aior;
672	struct blockif_req *breq;
673	int err;
674
675	/*
676	 * Pull request off free list
677	 */
678	aior = STAILQ_FIRST(&p->iofhd);
679	assert(aior != NULL);
680	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
681	aior->cfis = cfis;
682	aior->slot = slot;
683	aior->len = 0;
684	aior->done = 0;
685	aior->more = 0;
686	breq = &aior->io_req;
687
688	/*
689	 * Mark this command in-flight.
690	 */
691	p->pending |= 1 << slot;
692
693	/*
694	 * Stuff request onto busy list
695	 */
696	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
697
698	err = blockif_flush(p->bctx, breq);
699	assert(err == 0);
700}
701
702static inline void
703read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
704		void *buf, int size)
705{
706	struct ahci_cmd_hdr *hdr;
707	struct ahci_prdt_entry *prdt;
708	void *to;
709	int i, len;
710
711	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
712	len = size;
713	to = buf;
714	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
715	for (i = 0; i < hdr->prdtl && len; i++) {
716		uint8_t *ptr;
717		uint32_t dbcsz;
718		int sublen;
719
720		dbcsz = (prdt->dbc & DBCMASK) + 1;
721		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
722		sublen = len < dbcsz ? len : dbcsz;
723		memcpy(to, ptr, sublen);
724		len -= sublen;
725		to += sublen;
726		prdt++;
727	}
728}
729
730static void
731ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
732{
733	struct ahci_ioreq *aior;
734	struct blockif_req *breq;
735	uint8_t *entry;
736	uint64_t elba;
737	uint32_t len, elen;
738	int err;
739	uint8_t buf[512];
740
741	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
742		len = (uint16_t)cfis[13] << 8 | cfis[12];
743		len *= 512;
744	} else { /* ATA_SEND_FPDMA_QUEUED */
745		len = (uint16_t)cfis[11] << 8 | cfis[3];
746		len *= 512;
747	}
748	read_prdt(p, slot, cfis, buf, sizeof(buf));
749
750next:
751	entry = &buf[done];
752	elba = ((uint64_t)entry[5] << 40) |
753		((uint64_t)entry[4] << 32) |
754		((uint64_t)entry[3] << 24) |
755		((uint64_t)entry[2] << 16) |
756		((uint64_t)entry[1] << 8) |
757		entry[0];
758	elen = (uint16_t)entry[7] << 8 | entry[6];
759	done += 8;
760	if (elen == 0) {
761		if (done >= len) {
762			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
763			p->pending &= ~(1 << slot);
764			ahci_check_stopped(p);
765			return;
766		}
767		goto next;
768	}
769
770	/*
771	 * Pull request off free list
772	 */
773	aior = STAILQ_FIRST(&p->iofhd);
774	assert(aior != NULL);
775	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
776	aior->cfis = cfis;
777	aior->slot = slot;
778	aior->len = len;
779	aior->done = done;
780	aior->more = (len != done);
781
782	breq = &aior->io_req;
783	breq->br_offset = elba * blockif_sectsz(p->bctx);
784	breq->br_resid = elen * blockif_sectsz(p->bctx);
785
786	/*
787	 * Mark this command in-flight.
788	 */
789	p->pending |= 1 << slot;
790
791	/*
792	 * Stuff request onto busy list
793	 */
794	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
795
796	err = blockif_delete(p->bctx, breq);
797	assert(err == 0);
798}
799
800static inline void
801write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
802		void *buf, int size)
803{
804	struct ahci_cmd_hdr *hdr;
805	struct ahci_prdt_entry *prdt;
806	void *from;
807	int i, len;
808
809	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
810	len = size;
811	from = buf;
812	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
813	for (i = 0; i < hdr->prdtl && len; i++) {
814		uint8_t *ptr;
815		uint32_t dbcsz;
816		int sublen;
817
818		dbcsz = (prdt->dbc & DBCMASK) + 1;
819		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
820		sublen = len < dbcsz ? len : dbcsz;
821		memcpy(ptr, from, sublen);
822		len -= sublen;
823		from += sublen;
824		prdt++;
825	}
826	hdr->prdbc = size - len;
827}
828
829static void
830ahci_checksum(uint8_t *buf, int size)
831{
832	int i;
833	uint8_t sum = 0;
834
835	for (i = 0; i < size - 1; i++)
836		sum += buf[i];
837	buf[size - 1] = 0x100 - sum;
838}
839
840static void
841ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
842{
843	struct ahci_cmd_hdr *hdr;
844	uint8_t buf[512];
845
846	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
847	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
848	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
849		ahci_write_fis_d2h(p, slot, cfis,
850		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
851		return;
852	}
853
854	memset(buf, 0, sizeof(buf));
855	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
856	ahci_checksum(buf, sizeof(buf));
857
858	if (cfis[2] == ATA_READ_LOG_EXT)
859		ahci_write_fis_piosetup(p);
860	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
861	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
862}
863
864static void
865handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
866{
867	struct ahci_cmd_hdr *hdr;
868
869	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
870	if (p->atapi || hdr->prdtl == 0) {
871		ahci_write_fis_d2h(p, slot, cfis,
872		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
873	} else {
874		uint16_t buf[256];
875		uint64_t sectors;
876		int sectsz, psectsz, psectoff, candelete, ro;
877		uint16_t cyl;
878		uint8_t sech, heads;
879
880		ro = blockif_is_ro(p->bctx);
881		candelete = blockif_candelete(p->bctx);
882		sectsz = blockif_sectsz(p->bctx);
883		sectors = blockif_size(p->bctx) / sectsz;
884		blockif_chs(p->bctx, &cyl, &heads, &sech);
885		blockif_psectsz(p->bctx, &psectsz, &psectoff);
886		memset(buf, 0, sizeof(buf));
887		buf[0] = 0x0040;
888		buf[1] = cyl;
889		buf[3] = heads;
890		buf[6] = sech;
891		ata_string((uint8_t *)(buf+10), p->ident, 20);
892		ata_string((uint8_t *)(buf+23), "001", 8);
893		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
894		buf[47] = (0x8000 | 128);
895		buf[48] = 0x1;
896		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
897		buf[50] = (1 << 14);
898		buf[53] = (1 << 1 | 1 << 2);
899		if (p->mult_sectors)
900			buf[59] = (0x100 | p->mult_sectors);
901		if (sectors <= 0x0fffffff) {
902			buf[60] = sectors;
903			buf[61] = (sectors >> 16);
904		} else {
905			buf[60] = 0xffff;
906			buf[61] = 0x0fff;
907		}
908		buf[63] = 0x7;
909		if (p->xfermode & ATA_WDMA0)
910			buf[63] |= (1 << ((p->xfermode & 7) + 8));
911		buf[64] = 0x3;
912		buf[65] = 120;
913		buf[66] = 120;
914		buf[67] = 120;
915		buf[68] = 120;
916		buf[69] = 0;
917		buf[75] = 31;
918		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
919			   ATA_SUPPORT_NCQ);
920		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
921			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
922		buf[80] = 0x3f0;
923		buf[81] = 0x28;
924		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
925			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
926		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
927			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
928		buf[84] = (1 << 14);
929		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
930			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
931		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
932			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
933		buf[87] = (1 << 14);
934		buf[88] = 0x7f;
935		if (p->xfermode & ATA_UDMA0)
936			buf[88] |= (1 << ((p->xfermode & 7) + 8));
937		buf[100] = sectors;
938		buf[101] = (sectors >> 16);
939		buf[102] = (sectors >> 32);
940		buf[103] = (sectors >> 48);
941		if (candelete && !ro) {
942			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
943			buf[105] = 1;
944			buf[169] = ATA_SUPPORT_DSM_TRIM;
945		}
946		buf[106] = 0x4000;
947		buf[209] = 0x4000;
948		if (psectsz > sectsz) {
949			buf[106] |= 0x2000;
950			buf[106] |= ffsl(psectsz / sectsz) - 1;
951			buf[209] |= (psectoff / sectsz);
952		}
953		if (sectsz > 512) {
954			buf[106] |= 0x1000;
955			buf[117] = sectsz / 2;
956			buf[118] = ((sectsz / 2) >> 16);
957		}
958		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
959		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
960		buf[222] = 0x1020;
961		buf[255] = 0x00a5;
962		ahci_checksum((uint8_t *)buf, sizeof(buf));
963		ahci_write_fis_piosetup(p);
964		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
965		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
966	}
967}
968
969static void
970handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
971{
972	if (!p->atapi) {
973		ahci_write_fis_d2h(p, slot, cfis,
974		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
975	} else {
976		uint16_t buf[256];
977
978		memset(buf, 0, sizeof(buf));
979		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
980		ata_string((uint8_t *)(buf+10), p->ident, 20);
981		ata_string((uint8_t *)(buf+23), "001", 8);
982		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
983		buf[49] = (1 << 9 | 1 << 8);
984		buf[50] = (1 << 14 | 1);
985		buf[53] = (1 << 2 | 1 << 1);
986		buf[62] = 0x3f;
987		buf[63] = 7;
988		if (p->xfermode & ATA_WDMA0)
989			buf[63] |= (1 << ((p->xfermode & 7) + 8));
990		buf[64] = 3;
991		buf[65] = 120;
992		buf[66] = 120;
993		buf[67] = 120;
994		buf[68] = 120;
995		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
996		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
997		buf[78] = (1 << 5);
998		buf[80] = 0x3f0;
999		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1000			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1001		buf[83] = (1 << 14);
1002		buf[84] = (1 << 14);
1003		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1004			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1005		buf[87] = (1 << 14);
1006		buf[88] = 0x7f;
1007		if (p->xfermode & ATA_UDMA0)
1008			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1009		buf[222] = 0x1020;
1010		buf[255] = 0x00a5;
1011		ahci_checksum((uint8_t *)buf, sizeof(buf));
1012		ahci_write_fis_piosetup(p);
1013		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1014		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1015	}
1016}
1017
1018static void
1019atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1020{
1021	uint8_t buf[36];
1022	uint8_t *acmd;
1023	int len;
1024	uint32_t tfd;
1025
1026	acmd = cfis + 0x40;
1027
1028	if (acmd[1] & 1) {		/* VPD */
1029		if (acmd[2] == 0) {	/* Supported VPD pages */
1030			buf[0] = 0x05;
1031			buf[1] = 0;
1032			buf[2] = 0;
1033			buf[3] = 1;
1034			buf[4] = 0;
1035			len = 4 + buf[3];
1036		} else {
1037			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1038			p->asc = 0x24;
1039			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1040			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1041			ahci_write_fis_d2h(p, slot, cfis, tfd);
1042			return;
1043		}
1044	} else {
1045		buf[0] = 0x05;
1046		buf[1] = 0x80;
1047		buf[2] = 0x00;
1048		buf[3] = 0x21;
1049		buf[4] = 31;
1050		buf[5] = 0;
1051		buf[6] = 0;
1052		buf[7] = 0;
1053		atapi_string(buf + 8, "BHYVE", 8);
1054		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1055		atapi_string(buf + 32, "001", 4);
1056		len = sizeof(buf);
1057	}
1058
1059	if (len > acmd[4])
1060		len = acmd[4];
1061	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1062	write_prdt(p, slot, cfis, buf, len);
1063	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1064}
1065
1066static void
1067atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1068{
1069	uint8_t buf[8];
1070	uint64_t sectors;
1071
1072	sectors = blockif_size(p->bctx) / 2048;
1073	be32enc(buf, sectors - 1);
1074	be32enc(buf + 4, 2048);
1075	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1076	write_prdt(p, slot, cfis, buf, sizeof(buf));
1077	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1078}
1079
1080static void
1081atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1082{
1083	uint8_t *acmd;
1084	uint8_t format;
1085	int len;
1086
1087	acmd = cfis + 0x40;
1088
1089	len = be16dec(acmd + 7);
1090	format = acmd[9] >> 6;
1091	switch (format) {
1092	case 0:
1093	{
1094		int msf, size;
1095		uint64_t sectors;
1096		uint8_t start_track, buf[20], *bp;
1097
1098		msf = (acmd[1] >> 1) & 1;
1099		start_track = acmd[6];
1100		if (start_track > 1 && start_track != 0xaa) {
1101			uint32_t tfd;
1102			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1103			p->asc = 0x24;
1104			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1105			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1106			ahci_write_fis_d2h(p, slot, cfis, tfd);
1107			return;
1108		}
1109		bp = buf + 2;
1110		*bp++ = 1;
1111		*bp++ = 1;
1112		if (start_track <= 1) {
1113			*bp++ = 0;
1114			*bp++ = 0x14;
1115			*bp++ = 1;
1116			*bp++ = 0;
1117			if (msf) {
1118				*bp++ = 0;
1119				lba_to_msf(bp, 0);
1120				bp += 3;
1121			} else {
1122				*bp++ = 0;
1123				*bp++ = 0;
1124				*bp++ = 0;
1125				*bp++ = 0;
1126			}
1127		}
1128		*bp++ = 0;
1129		*bp++ = 0x14;
1130		*bp++ = 0xaa;
1131		*bp++ = 0;
1132		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1133		sectors >>= 2;
1134		if (msf) {
1135			*bp++ = 0;
1136			lba_to_msf(bp, sectors);
1137			bp += 3;
1138		} else {
1139			be32enc(bp, sectors);
1140			bp += 4;
1141		}
1142		size = bp - buf;
1143		be16enc(buf, size - 2);
1144		if (len > size)
1145			len = size;
1146		write_prdt(p, slot, cfis, buf, len);
1147		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1148		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1149		break;
1150	}
1151	case 1:
1152	{
1153		uint8_t buf[12];
1154
1155		memset(buf, 0, sizeof(buf));
1156		buf[1] = 0xa;
1157		buf[2] = 0x1;
1158		buf[3] = 0x1;
1159		if (len > sizeof(buf))
1160			len = sizeof(buf);
1161		write_prdt(p, slot, cfis, buf, len);
1162		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1163		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1164		break;
1165	}
1166	case 2:
1167	{
1168		int msf, size;
1169		uint64_t sectors;
1170		uint8_t start_track, *bp, buf[50];
1171
1172		msf = (acmd[1] >> 1) & 1;
1173		start_track = acmd[6];
1174		bp = buf + 2;
1175		*bp++ = 1;
1176		*bp++ = 1;
1177
1178		*bp++ = 1;
1179		*bp++ = 0x14;
1180		*bp++ = 0;
1181		*bp++ = 0xa0;
1182		*bp++ = 0;
1183		*bp++ = 0;
1184		*bp++ = 0;
1185		*bp++ = 0;
1186		*bp++ = 1;
1187		*bp++ = 0;
1188		*bp++ = 0;
1189
1190		*bp++ = 1;
1191		*bp++ = 0x14;
1192		*bp++ = 0;
1193		*bp++ = 0xa1;
1194		*bp++ = 0;
1195		*bp++ = 0;
1196		*bp++ = 0;
1197		*bp++ = 0;
1198		*bp++ = 1;
1199		*bp++ = 0;
1200		*bp++ = 0;
1201
1202		*bp++ = 1;
1203		*bp++ = 0x14;
1204		*bp++ = 0;
1205		*bp++ = 0xa2;
1206		*bp++ = 0;
1207		*bp++ = 0;
1208		*bp++ = 0;
1209		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1210		sectors >>= 2;
1211		if (msf) {
1212			*bp++ = 0;
1213			lba_to_msf(bp, sectors);
1214			bp += 3;
1215		} else {
1216			be32enc(bp, sectors);
1217			bp += 4;
1218		}
1219
1220		*bp++ = 1;
1221		*bp++ = 0x14;
1222		*bp++ = 0;
1223		*bp++ = 1;
1224		*bp++ = 0;
1225		*bp++ = 0;
1226		*bp++ = 0;
1227		if (msf) {
1228			*bp++ = 0;
1229			lba_to_msf(bp, 0);
1230			bp += 3;
1231		} else {
1232			*bp++ = 0;
1233			*bp++ = 0;
1234			*bp++ = 0;
1235			*bp++ = 0;
1236		}
1237
1238		size = bp - buf;
1239		be16enc(buf, size - 2);
1240		if (len > size)
1241			len = size;
1242		write_prdt(p, slot, cfis, buf, len);
1243		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1244		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1245		break;
1246	}
1247	default:
1248	{
1249		uint32_t tfd;
1250
1251		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1252		p->asc = 0x24;
1253		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1254		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1255		ahci_write_fis_d2h(p, slot, cfis, tfd);
1256		break;
1257	}
1258	}
1259}
1260
1261static void
1262atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1263{
1264	uint8_t buf[16];
1265
1266	memset(buf, 0, sizeof(buf));
1267	buf[3] = 8;
1268
1269	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1270	write_prdt(p, slot, cfis, buf, sizeof(buf));
1271	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1272}
1273
1274static void
1275atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1276{
1277	struct ahci_ioreq *aior;
1278	struct ahci_cmd_hdr *hdr;
1279	struct ahci_prdt_entry *prdt;
1280	struct blockif_req *breq;
1281	struct pci_ahci_softc *sc;
1282	uint8_t *acmd;
1283	uint64_t lba;
1284	uint32_t len;
1285	int err;
1286
1287	sc = p->pr_sc;
1288	acmd = cfis + 0x40;
1289	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1290	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1291
1292	lba = be32dec(acmd + 2);
1293	if (acmd[0] == READ_10)
1294		len = be16dec(acmd + 7);
1295	else
1296		len = be32dec(acmd + 6);
1297	if (len == 0) {
1298		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1299		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1300	}
1301	lba *= 2048;
1302	len *= 2048;
1303
1304	/*
1305	 * Pull request off free list
1306	 */
1307	aior = STAILQ_FIRST(&p->iofhd);
1308	assert(aior != NULL);
1309	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1310	aior->cfis = cfis;
1311	aior->slot = slot;
1312	aior->len = len;
1313	aior->done = done;
1314	breq = &aior->io_req;
1315	breq->br_offset = lba + done;
1316	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1317
1318	/* Mark this command in-flight. */
1319	p->pending |= 1 << slot;
1320
1321	/* Stuff request onto busy list. */
1322	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1323
1324	err = blockif_read(p->bctx, breq);
1325	assert(err == 0);
1326}
1327
1328static void
1329atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1330{
1331	uint8_t buf[64];
1332	uint8_t *acmd;
1333	int len;
1334
1335	acmd = cfis + 0x40;
1336	len = acmd[4];
1337	if (len > sizeof(buf))
1338		len = sizeof(buf);
1339	memset(buf, 0, len);
1340	buf[0] = 0x70 | (1 << 7);
1341	buf[2] = p->sense_key;
1342	buf[7] = 10;
1343	buf[12] = p->asc;
1344	write_prdt(p, slot, cfis, buf, len);
1345	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1346	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1347}
1348
1349static void
1350atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1351{
1352	uint8_t *acmd = cfis + 0x40;
1353	uint32_t tfd;
1354
1355	switch (acmd[4] & 3) {
1356	case 0:
1357	case 1:
1358	case 3:
1359		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1360		tfd = ATA_S_READY | ATA_S_DSC;
1361		break;
1362	case 2:
1363		/* TODO eject media */
1364		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1365		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1366		p->asc = 0x53;
1367		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1368		break;
1369	}
1370	ahci_write_fis_d2h(p, slot, cfis, tfd);
1371}
1372
1373static void
1374atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1375{
1376	uint8_t *acmd;
1377	uint32_t tfd;
1378	uint8_t pc, code;
1379	int len;
1380
1381	acmd = cfis + 0x40;
1382	len = be16dec(acmd + 7);
1383	pc = acmd[2] >> 6;
1384	code = acmd[2] & 0x3f;
1385
1386	switch (pc) {
1387	case 0:
1388		switch (code) {
1389		case MODEPAGE_RW_ERROR_RECOVERY:
1390		{
1391			uint8_t buf[16];
1392
1393			if (len > sizeof(buf))
1394				len = sizeof(buf);
1395
1396			memset(buf, 0, sizeof(buf));
1397			be16enc(buf, 16 - 2);
1398			buf[2] = 0x70;
1399			buf[8] = 0x01;
1400			buf[9] = 16 - 10;
1401			buf[11] = 0x05;
1402			write_prdt(p, slot, cfis, buf, len);
1403			tfd = ATA_S_READY | ATA_S_DSC;
1404			break;
1405		}
1406		case MODEPAGE_CD_CAPABILITIES:
1407		{
1408			uint8_t buf[30];
1409
1410			if (len > sizeof(buf))
1411				len = sizeof(buf);
1412
1413			memset(buf, 0, sizeof(buf));
1414			be16enc(buf, 30 - 2);
1415			buf[2] = 0x70;
1416			buf[8] = 0x2A;
1417			buf[9] = 30 - 10;
1418			buf[10] = 0x08;
1419			buf[12] = 0x71;
1420			be16enc(&buf[18], 2);
1421			be16enc(&buf[20], 512);
1422			write_prdt(p, slot, cfis, buf, len);
1423			tfd = ATA_S_READY | ATA_S_DSC;
1424			break;
1425		}
1426		default:
1427			goto error;
1428			break;
1429		}
1430		break;
1431	case 3:
1432		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1433		p->asc = 0x39;
1434		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1435		break;
1436error:
1437	case 1:
1438	case 2:
1439		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1440		p->asc = 0x24;
1441		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1442		break;
1443	}
1444	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1445	ahci_write_fis_d2h(p, slot, cfis, tfd);
1446}
1447
1448static void
1449atapi_get_event_status_notification(struct ahci_port *p, int slot,
1450    uint8_t *cfis)
1451{
1452	uint8_t *acmd;
1453	uint32_t tfd;
1454
1455	acmd = cfis + 0x40;
1456
1457	/* we don't support asynchronous operation */
1458	if (!(acmd[1] & 1)) {
1459		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1460		p->asc = 0x24;
1461		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1462	} else {
1463		uint8_t buf[8];
1464		int len;
1465
1466		len = be16dec(acmd + 7);
1467		if (len > sizeof(buf))
1468			len = sizeof(buf);
1469
1470		memset(buf, 0, sizeof(buf));
1471		be16enc(buf, 8 - 2);
1472		buf[2] = 0x04;
1473		buf[3] = 0x10;
1474		buf[5] = 0x02;
1475		write_prdt(p, slot, cfis, buf, len);
1476		tfd = ATA_S_READY | ATA_S_DSC;
1477	}
1478	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1479	ahci_write_fis_d2h(p, slot, cfis, tfd);
1480}
1481
1482static void
1483handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1484{
1485	uint8_t *acmd;
1486
1487	acmd = cfis + 0x40;
1488
1489#ifdef AHCI_DEBUG
1490	{
1491		int i;
1492		DPRINTF("ACMD:");
1493		for (i = 0; i < 16; i++)
1494			DPRINTF("%02x ", acmd[i]);
1495		DPRINTF("\n");
1496	}
1497#endif
1498
1499	switch (acmd[0]) {
1500	case TEST_UNIT_READY:
1501		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1502		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1503		break;
1504	case INQUIRY:
1505		atapi_inquiry(p, slot, cfis);
1506		break;
1507	case READ_CAPACITY:
1508		atapi_read_capacity(p, slot, cfis);
1509		break;
1510	case PREVENT_ALLOW:
1511		/* TODO */
1512		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1513		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1514		break;
1515	case READ_TOC:
1516		atapi_read_toc(p, slot, cfis);
1517		break;
1518	case REPORT_LUNS:
1519		atapi_report_luns(p, slot, cfis);
1520		break;
1521	case READ_10:
1522	case READ_12:
1523		atapi_read(p, slot, cfis, 0);
1524		break;
1525	case REQUEST_SENSE:
1526		atapi_request_sense(p, slot, cfis);
1527		break;
1528	case START_STOP_UNIT:
1529		atapi_start_stop_unit(p, slot, cfis);
1530		break;
1531	case MODE_SENSE_10:
1532		atapi_mode_sense(p, slot, cfis);
1533		break;
1534	case GET_EVENT_STATUS_NOTIFICATION:
1535		atapi_get_event_status_notification(p, slot, cfis);
1536		break;
1537	default:
1538		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1539		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1540		p->asc = 0x20;
1541		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1542				ATA_S_READY | ATA_S_ERROR);
1543		break;
1544	}
1545}
1546
1547static void
1548ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1549{
1550
1551	switch (cfis[2]) {
1552	case ATA_ATA_IDENTIFY:
1553		handle_identify(p, slot, cfis);
1554		break;
1555	case ATA_SETFEATURES:
1556	{
1557		switch (cfis[3]) {
1558		case ATA_SF_ENAB_SATA_SF:
1559			switch (cfis[12]) {
1560			case ATA_SATA_SF_AN:
1561				p->tfd = ATA_S_DSC | ATA_S_READY;
1562				break;
1563			default:
1564				p->tfd = ATA_S_ERROR | ATA_S_READY;
1565				p->tfd |= (ATA_ERROR_ABORT << 8);
1566				break;
1567			}
1568			break;
1569		case ATA_SF_ENAB_WCACHE:
1570		case ATA_SF_DIS_WCACHE:
1571		case ATA_SF_ENAB_RCACHE:
1572		case ATA_SF_DIS_RCACHE:
1573			p->tfd = ATA_S_DSC | ATA_S_READY;
1574			break;
1575		case ATA_SF_SETXFER:
1576		{
1577			switch (cfis[12] & 0xf8) {
1578			case ATA_PIO:
1579			case ATA_PIO0:
1580				break;
1581			case ATA_WDMA0:
1582			case ATA_UDMA0:
1583				p->xfermode = (cfis[12] & 0x7);
1584				break;
1585			}
1586			p->tfd = ATA_S_DSC | ATA_S_READY;
1587			break;
1588		}
1589		default:
1590			p->tfd = ATA_S_ERROR | ATA_S_READY;
1591			p->tfd |= (ATA_ERROR_ABORT << 8);
1592			break;
1593		}
1594		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1595		break;
1596	}
1597	case ATA_SET_MULTI:
1598		if (cfis[12] != 0 &&
1599			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1600			p->tfd = ATA_S_ERROR | ATA_S_READY;
1601			p->tfd |= (ATA_ERROR_ABORT << 8);
1602		} else {
1603			p->mult_sectors = cfis[12];
1604			p->tfd = ATA_S_DSC | ATA_S_READY;
1605		}
1606		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1607		break;
1608	case ATA_READ:
1609	case ATA_WRITE:
1610	case ATA_READ48:
1611	case ATA_WRITE48:
1612	case ATA_READ_MUL:
1613	case ATA_WRITE_MUL:
1614	case ATA_READ_MUL48:
1615	case ATA_WRITE_MUL48:
1616	case ATA_READ_DMA:
1617	case ATA_WRITE_DMA:
1618	case ATA_READ_DMA48:
1619	case ATA_WRITE_DMA48:
1620	case ATA_READ_FPDMA_QUEUED:
1621	case ATA_WRITE_FPDMA_QUEUED:
1622		ahci_handle_rw(p, slot, cfis, 0);
1623		break;
1624	case ATA_FLUSHCACHE:
1625	case ATA_FLUSHCACHE48:
1626		ahci_handle_flush(p, slot, cfis);
1627		break;
1628	case ATA_DATA_SET_MANAGEMENT:
1629		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1630		    cfis[13] == 0 && cfis[12] == 1) {
1631			ahci_handle_dsm_trim(p, slot, cfis, 0);
1632			break;
1633		}
1634		ahci_write_fis_d2h(p, slot, cfis,
1635		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1636		break;
1637	case ATA_SEND_FPDMA_QUEUED:
1638		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1639		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1640		    cfis[11] == 0 && cfis[13] == 1) {
1641			ahci_handle_dsm_trim(p, slot, cfis, 0);
1642			break;
1643		}
1644		ahci_write_fis_d2h(p, slot, cfis,
1645		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1646		break;
1647	case ATA_READ_LOG_EXT:
1648	case ATA_READ_LOG_DMA_EXT:
1649		ahci_handle_read_log(p, slot, cfis);
1650		break;
1651	case ATA_NOP:
1652		ahci_write_fis_d2h(p, slot, cfis,
1653		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1654		break;
1655	case ATA_STANDBY_CMD:
1656	case ATA_STANDBY_IMMEDIATE:
1657	case ATA_IDLE_CMD:
1658	case ATA_IDLE_IMMEDIATE:
1659	case ATA_SLEEP:
1660		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1661		break;
1662	case ATA_ATAPI_IDENTIFY:
1663		handle_atapi_identify(p, slot, cfis);
1664		break;
1665	case ATA_PACKET_CMD:
1666		if (!p->atapi) {
1667			ahci_write_fis_d2h(p, slot, cfis,
1668			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1669		} else
1670			handle_packet_cmd(p, slot, cfis);
1671		break;
1672	default:
1673		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1674		ahci_write_fis_d2h(p, slot, cfis,
1675		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1676		break;
1677	}
1678}
1679
1680static void
1681ahci_handle_slot(struct ahci_port *p, int slot)
1682{
1683	struct ahci_cmd_hdr *hdr;
1684	struct ahci_prdt_entry *prdt;
1685	struct pci_ahci_softc *sc;
1686	uint8_t *cfis;
1687	int cfl;
1688
1689	sc = p->pr_sc;
1690	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1691	cfl = (hdr->flags & 0x1f) * 4;
1692	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1693			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1694	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1695
1696#ifdef AHCI_DEBUG
1697	DPRINTF("\ncfis:");
1698	for (i = 0; i < cfl; i++) {
1699		if (i % 10 == 0)
1700			DPRINTF("\n");
1701		DPRINTF("%02x ", cfis[i]);
1702	}
1703	DPRINTF("\n");
1704
1705	for (i = 0; i < hdr->prdtl; i++) {
1706		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1707		prdt++;
1708	}
1709#endif
1710
1711	if (cfis[0] != FIS_TYPE_REGH2D) {
1712		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1713		return;
1714	}
1715
1716	if (cfis[1] & 0x80) {
1717		ahci_handle_cmd(p, slot, cfis);
1718	} else {
1719		if (cfis[15] & (1 << 2))
1720			p->reset = 1;
1721		else if (p->reset) {
1722			p->reset = 0;
1723			ahci_port_reset(p);
1724		}
1725		p->ci &= ~(1 << slot);
1726	}
1727}
1728
1729static void
1730ahci_handle_port(struct ahci_port *p)
1731{
1732	int i;
1733
1734	if (!(p->cmd & AHCI_P_CMD_ST))
1735		return;
1736
1737	/*
1738	 * Search for any new commands to issue ignoring those that
1739	 * are already in-flight.
1740	 */
1741	for (i = 0; (i < 32) && p->ci; i++) {
1742		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1743			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1744			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1745			ahci_handle_slot(p, i);
1746		}
1747	}
1748}
1749
1750/*
1751 * blockif callback routine - this runs in the context of the blockif
1752 * i/o thread, so the mutex needs to be acquired.
1753 */
1754static void
1755ata_ioreq_cb(struct blockif_req *br, int err)
1756{
1757	struct ahci_cmd_hdr *hdr;
1758	struct ahci_ioreq *aior;
1759	struct ahci_port *p;
1760	struct pci_ahci_softc *sc;
1761	uint32_t tfd;
1762	uint8_t *cfis;
1763	int slot, ncq, dsm;
1764
1765	DPRINTF("%s %d\n", __func__, err);
1766
1767	ncq = dsm = 0;
1768	aior = br->br_param;
1769	p = aior->io_pr;
1770	cfis = aior->cfis;
1771	slot = aior->slot;
1772	sc = p->pr_sc;
1773	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1774
1775	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1776	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1777	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1778		ncq = 1;
1779	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1780	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1781	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1782		dsm = 1;
1783
1784	pthread_mutex_lock(&sc->mtx);
1785
1786	/*
1787	 * Delete the blockif request from the busy list
1788	 */
1789	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1790
1791	/*
1792	 * Move the blockif request back to the free list
1793	 */
1794	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1795
1796	if (!err)
1797		hdr->prdbc = aior->done;
1798
1799	if (!err && aior->more) {
1800		if (dsm)
1801			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1802		else
1803			ahci_handle_rw(p, slot, cfis, aior->done);
1804		goto out;
1805	}
1806
1807	if (!err)
1808		tfd = ATA_S_READY | ATA_S_DSC;
1809	else
1810		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1811	if (ncq)
1812		ahci_write_fis_sdb(p, slot, cfis, tfd);
1813	else
1814		ahci_write_fis_d2h(p, slot, cfis, tfd);
1815
1816	/*
1817	 * This command is now complete.
1818	 */
1819	p->pending &= ~(1 << slot);
1820
1821	ahci_check_stopped(p);
1822out:
1823	pthread_mutex_unlock(&sc->mtx);
1824	DPRINTF("%s exit\n", __func__);
1825}
1826
1827static void
1828atapi_ioreq_cb(struct blockif_req *br, int err)
1829{
1830	struct ahci_cmd_hdr *hdr;
1831	struct ahci_ioreq *aior;
1832	struct ahci_port *p;
1833	struct pci_ahci_softc *sc;
1834	uint8_t *cfis;
1835	uint32_t tfd;
1836	int slot;
1837
1838	DPRINTF("%s %d\n", __func__, err);
1839
1840	aior = br->br_param;
1841	p = aior->io_pr;
1842	cfis = aior->cfis;
1843	slot = aior->slot;
1844	sc = p->pr_sc;
1845	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1846
1847	pthread_mutex_lock(&sc->mtx);
1848
1849	/*
1850	 * Delete the blockif request from the busy list
1851	 */
1852	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1853
1854	/*
1855	 * Move the blockif request back to the free list
1856	 */
1857	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1858
1859	if (!err)
1860		hdr->prdbc = aior->done;
1861
1862	if (!err && aior->more) {
1863		atapi_read(p, slot, cfis, aior->done);
1864		goto out;
1865	}
1866
1867	if (!err) {
1868		tfd = ATA_S_READY | ATA_S_DSC;
1869	} else {
1870		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1871		p->asc = 0x21;
1872		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1873	}
1874	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1875	ahci_write_fis_d2h(p, slot, cfis, tfd);
1876
1877	/*
1878	 * This command is now complete.
1879	 */
1880	p->pending &= ~(1 << slot);
1881
1882	ahci_check_stopped(p);
1883out:
1884	pthread_mutex_unlock(&sc->mtx);
1885	DPRINTF("%s exit\n", __func__);
1886}
1887
1888static void
1889pci_ahci_ioreq_init(struct ahci_port *pr)
1890{
1891	struct ahci_ioreq *vr;
1892	int i;
1893
1894	pr->ioqsz = blockif_queuesz(pr->bctx);
1895	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1896	STAILQ_INIT(&pr->iofhd);
1897
1898	/*
1899	 * Add all i/o request entries to the free queue
1900	 */
1901	for (i = 0; i < pr->ioqsz; i++) {
1902		vr = &pr->ioreq[i];
1903		vr->io_pr = pr;
1904		if (!pr->atapi)
1905			vr->io_req.br_callback = ata_ioreq_cb;
1906		else
1907			vr->io_req.br_callback = atapi_ioreq_cb;
1908		vr->io_req.br_param = vr;
1909		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1910	}
1911
1912	TAILQ_INIT(&pr->iobhd);
1913}
1914
1915static void
1916pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1917{
1918	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1919	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1920	struct ahci_port *p = &sc->port[port];
1921
1922	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1923		port, offset, value);
1924
1925	switch (offset) {
1926	case AHCI_P_CLB:
1927		p->clb = value;
1928		break;
1929	case AHCI_P_CLBU:
1930		p->clbu = value;
1931		break;
1932	case AHCI_P_FB:
1933		p->fb = value;
1934		break;
1935	case AHCI_P_FBU:
1936		p->fbu = value;
1937		break;
1938	case AHCI_P_IS:
1939		p->is &= ~value;
1940		break;
1941	case AHCI_P_IE:
1942		p->ie = value & 0xFDC000FF;
1943		ahci_generate_intr(sc);
1944		break;
1945	case AHCI_P_CMD:
1946	{
1947		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
1948		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
1949		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
1950		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
1951		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
1952		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
1953		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
1954		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
1955
1956		if (!(value & AHCI_P_CMD_ST)) {
1957			ahci_port_stop(p);
1958		} else {
1959			uint64_t clb;
1960
1961			p->cmd |= AHCI_P_CMD_CR;
1962			clb = (uint64_t)p->clbu << 32 | p->clb;
1963			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1964					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1965		}
1966
1967		if (value & AHCI_P_CMD_FRE) {
1968			uint64_t fb;
1969
1970			p->cmd |= AHCI_P_CMD_FR;
1971			fb = (uint64_t)p->fbu << 32 | p->fb;
1972			/* we don't support FBSCP, so rfis size is 256Bytes */
1973			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1974		} else {
1975			p->cmd &= ~AHCI_P_CMD_FR;
1976		}
1977
1978		if (value & AHCI_P_CMD_CLO) {
1979			p->tfd = 0;
1980			p->cmd &= ~AHCI_P_CMD_CLO;
1981		}
1982
1983		if (value & AHCI_P_CMD_ICC_MASK) {
1984			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
1985		}
1986
1987		ahci_handle_port(p);
1988		break;
1989	}
1990	case AHCI_P_TFD:
1991	case AHCI_P_SIG:
1992	case AHCI_P_SSTS:
1993		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1994		break;
1995	case AHCI_P_SCTL:
1996		p->sctl = value;
1997		if (!(p->cmd & AHCI_P_CMD_ST)) {
1998			if (value & ATA_SC_DET_RESET)
1999				ahci_port_reset(p);
2000		}
2001		break;
2002	case AHCI_P_SERR:
2003		p->serr &= ~value;
2004		break;
2005	case AHCI_P_SACT:
2006		p->sact |= value;
2007		break;
2008	case AHCI_P_CI:
2009		p->ci |= value;
2010		ahci_handle_port(p);
2011		break;
2012	case AHCI_P_SNTF:
2013	case AHCI_P_FBS:
2014	default:
2015		break;
2016	}
2017}
2018
2019static void
2020pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2021{
2022	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2023		offset, value);
2024
2025	switch (offset) {
2026	case AHCI_CAP:
2027	case AHCI_PI:
2028	case AHCI_VS:
2029	case AHCI_CAP2:
2030		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2031		break;
2032	case AHCI_GHC:
2033		if (value & AHCI_GHC_HR)
2034			ahci_reset(sc);
2035		else if (value & AHCI_GHC_IE) {
2036			sc->ghc |= AHCI_GHC_IE;
2037			ahci_generate_intr(sc);
2038		}
2039		break;
2040	case AHCI_IS:
2041		sc->is &= ~value;
2042		ahci_generate_intr(sc);
2043		break;
2044	default:
2045		break;
2046	}
2047}
2048
2049static void
2050pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2051		int baridx, uint64_t offset, int size, uint64_t value)
2052{
2053	struct pci_ahci_softc *sc = pi->pi_arg;
2054
2055	assert(baridx == 5);
2056	assert(size == 4);
2057
2058	pthread_mutex_lock(&sc->mtx);
2059
2060	if (offset < AHCI_OFFSET)
2061		pci_ahci_host_write(sc, offset, value);
2062	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2063		pci_ahci_port_write(sc, offset, value);
2064	else
2065		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2066
2067	pthread_mutex_unlock(&sc->mtx);
2068}
2069
2070static uint64_t
2071pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2072{
2073	uint32_t value;
2074
2075	switch (offset) {
2076	case AHCI_CAP:
2077	case AHCI_GHC:
2078	case AHCI_IS:
2079	case AHCI_PI:
2080	case AHCI_VS:
2081	case AHCI_CCCC:
2082	case AHCI_CCCP:
2083	case AHCI_EM_LOC:
2084	case AHCI_EM_CTL:
2085	case AHCI_CAP2:
2086	{
2087		uint32_t *p = &sc->cap;
2088		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2089		value = *p;
2090		break;
2091	}
2092	default:
2093		value = 0;
2094		break;
2095	}
2096	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2097		offset, value);
2098
2099	return (value);
2100}
2101
2102static uint64_t
2103pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2104{
2105	uint32_t value;
2106	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2107	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2108
2109	switch (offset) {
2110	case AHCI_P_CLB:
2111	case AHCI_P_CLBU:
2112	case AHCI_P_FB:
2113	case AHCI_P_FBU:
2114	case AHCI_P_IS:
2115	case AHCI_P_IE:
2116	case AHCI_P_CMD:
2117	case AHCI_P_TFD:
2118	case AHCI_P_SIG:
2119	case AHCI_P_SSTS:
2120	case AHCI_P_SCTL:
2121	case AHCI_P_SERR:
2122	case AHCI_P_SACT:
2123	case AHCI_P_CI:
2124	case AHCI_P_SNTF:
2125	case AHCI_P_FBS:
2126	{
2127		uint32_t *p= &sc->port[port].clb;
2128		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2129		value = *p;
2130		break;
2131	}
2132	default:
2133		value = 0;
2134		break;
2135	}
2136
2137	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2138		port, offset, value);
2139
2140	return value;
2141}
2142
2143static uint64_t
2144pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2145    uint64_t offset, int size)
2146{
2147	struct pci_ahci_softc *sc = pi->pi_arg;
2148	uint32_t value;
2149
2150	assert(baridx == 5);
2151	assert(size == 4);
2152
2153	pthread_mutex_lock(&sc->mtx);
2154
2155	if (offset < AHCI_OFFSET)
2156		value = pci_ahci_host_read(sc, offset);
2157	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2158		value = pci_ahci_port_read(sc, offset);
2159	else {
2160		value = 0;
2161		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
2162	}
2163
2164	pthread_mutex_unlock(&sc->mtx);
2165
2166	return (value);
2167}
2168
2169static int
2170pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2171{
2172	char bident[sizeof("XX:X:X")];
2173	struct blockif_ctxt *bctxt;
2174	struct pci_ahci_softc *sc;
2175	int ret, slots;
2176	MD5_CTX mdctx;
2177	u_char digest[16];
2178
2179	ret = 0;
2180
2181	if (opts == NULL) {
2182		fprintf(stderr, "pci_ahci: backing device required\n");
2183		return (1);
2184	}
2185
2186#ifdef AHCI_DEBUG
2187	dbg = fopen("/tmp/log", "w+");
2188#endif
2189
2190	sc = calloc(1, sizeof(struct pci_ahci_softc));
2191	pi->pi_arg = sc;
2192	sc->asc_pi = pi;
2193	sc->ports = MAX_PORTS;
2194
2195	/*
2196	 * Only use port 0 for a backing device. All other ports will be
2197	 * marked as unused
2198	 */
2199	sc->port[0].atapi = atapi;
2200
2201	/*
2202	 * Attempt to open the backing image. Use the PCI
2203	 * slot/func for the identifier string.
2204	 */
2205	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2206	bctxt = blockif_open(opts, bident);
2207	if (bctxt == NULL) {
2208		ret = 1;
2209		goto open_fail;
2210	}
2211	sc->port[0].bctx = bctxt;
2212	sc->port[0].pr_sc = sc;
2213
2214	/*
2215	 * Create an identifier for the backing file. Use parts of the
2216	 * md5 sum of the filename
2217	 */
2218	MD5Init(&mdctx);
2219	MD5Update(&mdctx, opts, strlen(opts));
2220	MD5Final(digest, &mdctx);
2221	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2222	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
2223
2224	/*
2225	 * Allocate blockif request structures and add them
2226	 * to the free list
2227	 */
2228	pci_ahci_ioreq_init(&sc->port[0]);
2229
2230	pthread_mutex_init(&sc->mtx, NULL);
2231
2232	/* Intel ICH8 AHCI */
2233	slots = sc->port[0].ioqsz;
2234	if (slots > 32)
2235		slots = 32;
2236	--slots;
2237	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2238	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2239	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2240	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2241	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2242
2243	/* Only port 0 implemented */
2244	sc->pi = 1;
2245	sc->vs = 0x10300;
2246	sc->cap2 = AHCI_CAP2_APST;
2247	ahci_reset(sc);
2248
2249	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2250	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2251	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2252	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2253	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2254	pci_emul_add_msicap(pi, 1);
2255	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2256	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2257
2258	pci_lintr_request(pi);
2259
2260open_fail:
2261	if (ret) {
2262		blockif_close(sc->port[0].bctx);
2263		free(sc);
2264	}
2265
2266	return (ret);
2267}
2268
2269static int
2270pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2271{
2272
2273	return (pci_ahci_init(ctx, pi, opts, 0));
2274}
2275
2276static int
2277pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2278{
2279
2280	return (pci_ahci_init(ctx, pi, opts, 1));
2281}
2282
2283/*
2284 * Use separate emulation names to distinguish drive and atapi devices
2285 */
2286struct pci_devemu pci_de_ahci_hd = {
2287	.pe_emu =	"ahci-hd",
2288	.pe_init =	pci_ahci_hd_init,
2289	.pe_barwrite =	pci_ahci_write,
2290	.pe_barread =	pci_ahci_read
2291};
2292PCI_EMUL_SET(pci_de_ahci_hd);
2293
2294struct pci_devemu pci_de_ahci_cd = {
2295	.pe_emu =	"ahci-cd",
2296	.pe_init =	pci_ahci_atapi_init,
2297	.pe_barwrite =	pci_ahci_write,
2298	.pe_barread =	pci_ahci_read
2299};
2300PCI_EMUL_SET(pci_de_ahci_cd);
2301