pci_ahci.c revision 282308
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 282308 2015-05-01 17:33:17Z mav $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 282308 2015-05-01 17:33:17Z mav $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <pthread_np.h>
52#include <inttypes.h>
53#include <md5.h>
54
55#include "bhyverun.h"
56#include "pci_emul.h"
57#include "ahci.h"
58#include "block_if.h"
59
60#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
61
62#define	PxSIG_ATA	0x00000101 /* ATA drive */
63#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
64
65enum sata_fis_type {
66	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
67	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
68	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
69	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
70	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
71	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
72	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
73	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
74};
75
76/*
77 * SCSI opcodes
78 */
79#define	TEST_UNIT_READY		0x00
80#define	REQUEST_SENSE		0x03
81#define	INQUIRY			0x12
82#define	START_STOP_UNIT		0x1B
83#define	PREVENT_ALLOW		0x1E
84#define	READ_CAPACITY		0x25
85#define	READ_10			0x28
86#define	POSITION_TO_ELEMENT	0x2B
87#define	READ_TOC		0x43
88#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
89#define	MODE_SENSE_10		0x5A
90#define	REPORT_LUNS		0xA0
91#define	READ_12			0xA8
92#define	READ_CD			0xBE
93
94/*
95 * SCSI mode page codes
96 */
97#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
98#define	MODEPAGE_CD_CAPABILITIES	0x2A
99
100/*
101 * ATA commands
102 */
103#define	ATA_SF_ENAB_SATA_SF		0x10
104#define		ATA_SATA_SF_AN		0x05
105#define	ATA_SF_DIS_SATA_SF		0x90
106
107/*
108 * Debug printf
109 */
110#ifdef AHCI_DEBUG
111static FILE *dbg;
112#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
113#else
114#define DPRINTF(format, arg...)
115#endif
116#define WPRINTF(format, arg...) printf(format, ##arg)
117
118struct ahci_ioreq {
119	struct blockif_req io_req;
120	struct ahci_port *io_pr;
121	STAILQ_ENTRY(ahci_ioreq) io_flist;
122	TAILQ_ENTRY(ahci_ioreq) io_blist;
123	uint8_t *cfis;
124	uint32_t len;
125	uint32_t done;
126	int slot;
127	int more;
128};
129
130struct ahci_port {
131	struct blockif_ctxt *bctx;
132	struct pci_ahci_softc *pr_sc;
133	uint8_t *cmd_lst;
134	uint8_t *rfis;
135	char ident[20 + 1];
136	int atapi;
137	int reset;
138	int mult_sectors;
139	uint8_t xfermode;
140	uint8_t err_cfis[20];
141	uint8_t sense_key;
142	uint8_t asc;
143	uint32_t pending;
144
145	uint32_t clb;
146	uint32_t clbu;
147	uint32_t fb;
148	uint32_t fbu;
149	uint32_t is;
150	uint32_t ie;
151	uint32_t cmd;
152	uint32_t unused0;
153	uint32_t tfd;
154	uint32_t sig;
155	uint32_t ssts;
156	uint32_t sctl;
157	uint32_t serr;
158	uint32_t sact;
159	uint32_t ci;
160	uint32_t sntf;
161	uint32_t fbs;
162
163	/*
164	 * i/o request info
165	 */
166	struct ahci_ioreq *ioreq;
167	int ioqsz;
168	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
169	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
170};
171
172struct ahci_cmd_hdr {
173	uint16_t flags;
174	uint16_t prdtl;
175	uint32_t prdbc;
176	uint64_t ctba;
177	uint32_t reserved[4];
178};
179
180struct ahci_prdt_entry {
181	uint64_t dba;
182	uint32_t reserved;
183#define	DBCMASK		0x3fffff
184	uint32_t dbc;
185};
186
187struct pci_ahci_softc {
188	struct pci_devinst *asc_pi;
189	pthread_mutex_t	mtx;
190	int ports;
191	uint32_t cap;
192	uint32_t ghc;
193	uint32_t is;
194	uint32_t pi;
195	uint32_t vs;
196	uint32_t ccc_ctl;
197	uint32_t ccc_pts;
198	uint32_t em_loc;
199	uint32_t em_ctl;
200	uint32_t cap2;
201	uint32_t bohc;
202	uint32_t lintr;
203	struct ahci_port port[MAX_PORTS];
204};
205#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
206
207static inline void lba_to_msf(uint8_t *buf, int lba)
208{
209	lba += 150;
210	buf[0] = (lba / 75) / 60;
211	buf[1] = (lba / 75) % 60;
212	buf[2] = lba % 75;
213}
214
215/*
216 * generate HBA intr depending on whether or not ports within
217 * the controller have an interrupt pending.
218 */
219static void
220ahci_generate_intr(struct pci_ahci_softc *sc)
221{
222	struct pci_devinst *pi;
223	int i;
224
225	pi = sc->asc_pi;
226
227	for (i = 0; i < sc->ports; i++) {
228		struct ahci_port *pr;
229		pr = &sc->port[i];
230		if (pr->is & pr->ie)
231			sc->is |= (1 << i);
232	}
233
234	DPRINTF("%s %x\n", __func__, sc->is);
235
236	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
237		if (pci_msi_enabled(pi)) {
238			/*
239			 * Generate an MSI interrupt on every edge
240			 */
241			pci_generate_msi(pi, 0);
242		} else if (!sc->lintr) {
243			/*
244			 * Only generate a pin-based interrupt if one wasn't
245			 * in progress
246			 */
247			sc->lintr = 1;
248			pci_lintr_assert(pi);
249		}
250	} else if (sc->lintr) {
251		/*
252		 * No interrupts: deassert pin-based signal if it had
253		 * been asserted
254		 */
255		pci_lintr_deassert(pi);
256		sc->lintr = 0;
257	}
258}
259
260static void
261ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
262{
263	int offset, len, irq;
264
265	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
266		return;
267
268	switch (ft) {
269	case FIS_TYPE_REGD2H:
270		offset = 0x40;
271		len = 20;
272		irq = AHCI_P_IX_DHR;
273		break;
274	case FIS_TYPE_SETDEVBITS:
275		offset = 0x58;
276		len = 8;
277		irq = AHCI_P_IX_SDB;
278		break;
279	case FIS_TYPE_PIOSETUP:
280		offset = 0x20;
281		len = 20;
282		irq = 0;
283		break;
284	default:
285		WPRINTF("unsupported fis type %d\n", ft);
286		return;
287	}
288	memcpy(p->rfis + offset, fis, len);
289	if (irq) {
290		p->is |= irq;
291		ahci_generate_intr(p->pr_sc);
292	}
293}
294
295static void
296ahci_write_fis_piosetup(struct ahci_port *p)
297{
298	uint8_t fis[20];
299
300	memset(fis, 0, sizeof(fis));
301	fis[0] = FIS_TYPE_PIOSETUP;
302	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
303}
304
305static void
306ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
307{
308	uint8_t fis[8];
309	uint8_t error;
310
311	error = (tfd >> 8) & 0xff;
312	memset(fis, 0, sizeof(fis));
313	fis[0] = FIS_TYPE_SETDEVBITS;
314	fis[1] = (1 << 6);
315	fis[2] = tfd & 0x77;
316	fis[3] = error;
317	if (fis[2] & ATA_S_ERROR) {
318		p->is |= AHCI_P_IX_TFE;
319		p->err_cfis[0] = slot;
320		p->err_cfis[2] = tfd & 0x77;
321		p->err_cfis[3] = error;
322		memcpy(&p->err_cfis[4], cfis + 4, 16);
323	} else {
324		*(uint32_t *)(fis + 4) = (1 << slot);
325		p->sact &= ~(1 << slot);
326	}
327	p->tfd = tfd;
328	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
329}
330
331static void
332ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
333{
334	uint8_t fis[20];
335	uint8_t error;
336
337	error = (tfd >> 8) & 0xff;
338	memset(fis, 0, sizeof(fis));
339	fis[0] = FIS_TYPE_REGD2H;
340	fis[1] = (1 << 6);
341	fis[2] = tfd & 0xff;
342	fis[3] = error;
343	fis[4] = cfis[4];
344	fis[5] = cfis[5];
345	fis[6] = cfis[6];
346	fis[7] = cfis[7];
347	fis[8] = cfis[8];
348	fis[9] = cfis[9];
349	fis[10] = cfis[10];
350	fis[11] = cfis[11];
351	fis[12] = cfis[12];
352	fis[13] = cfis[13];
353	if (fis[2] & ATA_S_ERROR) {
354		p->is |= AHCI_P_IX_TFE;
355		p->err_cfis[0] = 0x80;
356		p->err_cfis[2] = tfd & 0xff;
357		p->err_cfis[3] = error;
358		memcpy(&p->err_cfis[4], cfis + 4, 16);
359	} else
360		p->ci &= ~(1 << slot);
361	p->tfd = tfd;
362	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
363}
364
365static void
366ahci_write_reset_fis_d2h(struct ahci_port *p)
367{
368	uint8_t fis[20];
369
370	memset(fis, 0, sizeof(fis));
371	fis[0] = FIS_TYPE_REGD2H;
372	fis[3] = 1;
373	fis[4] = 1;
374	if (p->atapi) {
375		fis[5] = 0x14;
376		fis[6] = 0xeb;
377	}
378	fis[12] = 1;
379	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
380}
381
382static void
383ahci_check_stopped(struct ahci_port *p)
384{
385	/*
386	 * If we are no longer processing the command list and nothing
387	 * is in-flight, clear the running bit, the current command
388	 * slot, the command issue and active bits.
389	 */
390	if (!(p->cmd & AHCI_P_CMD_ST)) {
391		if (p->pending == 0) {
392			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
393			p->ci = 0;
394			p->sact = 0;
395		}
396	}
397}
398
399static void
400ahci_port_stop(struct ahci_port *p)
401{
402	struct ahci_ioreq *aior;
403	uint8_t *cfis;
404	int slot;
405	int ncq;
406	int error;
407
408	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
409
410	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
411		/*
412		 * Try to cancel the outstanding blockif request.
413		 */
414		error = blockif_cancel(p->bctx, &aior->io_req);
415		if (error != 0)
416			continue;
417
418		slot = aior->slot;
419		cfis = aior->cfis;
420		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
421		    cfis[2] == ATA_READ_FPDMA_QUEUED)
422			ncq = 1;
423
424		if (ncq)
425			p->sact &= ~(1 << slot);
426		else
427			p->ci &= ~(1 << slot);
428
429		/*
430		 * This command is now done.
431		 */
432		p->pending &= ~(1 << slot);
433
434		/*
435		 * Delete the blockif request from the busy list
436		 */
437		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
438
439		/*
440		 * Move the blockif request back to the free list
441		 */
442		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
443	}
444
445	ahci_check_stopped(p);
446}
447
448static void
449ahci_port_reset(struct ahci_port *pr)
450{
451	pr->serr = 0;
452	pr->sact = 0;
453	pr->xfermode = ATA_UDMA6;
454	pr->mult_sectors = 128;
455
456	if (!pr->bctx) {
457		pr->ssts = ATA_SS_DET_NO_DEVICE;
458		pr->sig = 0xFFFFFFFF;
459		pr->tfd = 0x7F;
460		return;
461	}
462	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
463	if (pr->sctl & ATA_SC_SPD_MASK)
464		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
465	else
466		pr->ssts |= ATA_SS_SPD_GEN3;
467	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
468	if (!pr->atapi) {
469		pr->sig = PxSIG_ATA;
470		pr->tfd |= ATA_S_READY;
471	} else
472		pr->sig = PxSIG_ATAPI;
473	ahci_write_reset_fis_d2h(pr);
474}
475
476static void
477ahci_reset(struct pci_ahci_softc *sc)
478{
479	int i;
480
481	sc->ghc = AHCI_GHC_AE;
482	sc->is = 0;
483
484	if (sc->lintr) {
485		pci_lintr_deassert(sc->asc_pi);
486		sc->lintr = 0;
487	}
488
489	for (i = 0; i < sc->ports; i++) {
490		sc->port[i].ie = 0;
491		sc->port[i].is = 0;
492		sc->port[i].sctl = 0;
493		ahci_port_reset(&sc->port[i]);
494	}
495}
496
497static void
498ata_string(uint8_t *dest, const char *src, int len)
499{
500	int i;
501
502	for (i = 0; i < len; i++) {
503		if (*src)
504			dest[i ^ 1] = *src++;
505		else
506			dest[i ^ 1] = ' ';
507	}
508}
509
510static void
511atapi_string(uint8_t *dest, const char *src, int len)
512{
513	int i;
514
515	for (i = 0; i < len; i++) {
516		if (*src)
517			dest[i] = *src++;
518		else
519			dest[i] = ' ';
520	}
521}
522
523/*
524 * Build up the iovec based on the PRDT, 'done' and 'len'.
525 */
526static void
527ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
528    struct ahci_prdt_entry *prdt, uint16_t prdtl)
529{
530	struct blockif_req *breq = &aior->io_req;
531	int i, j, skip, todo, left, extra;
532	uint32_t dbcsz;
533
534	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
535	skip = aior->done;
536	left = aior->len - aior->done;
537	todo = 0;
538	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
539	    i++, prdt++) {
540		dbcsz = (prdt->dbc & DBCMASK) + 1;
541		/* Skip already done part of the PRDT */
542		if (dbcsz <= skip) {
543			skip -= dbcsz;
544			continue;
545		}
546		dbcsz -= skip;
547		if (dbcsz > left)
548			dbcsz = left;
549		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
550		    prdt->dba + skip, dbcsz);
551		breq->br_iov[j].iov_len = dbcsz;
552		todo += dbcsz;
553		left -= dbcsz;
554		skip = 0;
555		j++;
556	}
557
558	/* If we got limited by IOV length, round I/O down to sector size. */
559	if (j == BLOCKIF_IOV_MAX) {
560		extra = todo % blockif_sectsz(p->bctx);
561		todo -= extra;
562		assert(todo > 0);
563		while (extra > 0) {
564			if (breq->br_iov[j - 1].iov_len > extra) {
565				breq->br_iov[j - 1].iov_len -= extra;
566				break;
567			}
568			extra -= breq->br_iov[j - 1].iov_len;
569			j--;
570		}
571	}
572
573	breq->br_iovcnt = j;
574	breq->br_resid = todo;
575	aior->done += todo;
576	aior->more = (aior->done < aior->len && i < prdtl);
577}
578
579static void
580ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
581{
582	struct ahci_ioreq *aior;
583	struct blockif_req *breq;
584	struct ahci_prdt_entry *prdt;
585	struct ahci_cmd_hdr *hdr;
586	uint64_t lba;
587	uint32_t len;
588	int err, ncq, readop;
589
590	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
591	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
592	ncq = 0;
593	readop = 1;
594
595	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
596	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
597	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
598	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
599		readop = 0;
600
601	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
602	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
603		lba = ((uint64_t)cfis[10] << 40) |
604			((uint64_t)cfis[9] << 32) |
605			((uint64_t)cfis[8] << 24) |
606			((uint64_t)cfis[6] << 16) |
607			((uint64_t)cfis[5] << 8) |
608			cfis[4];
609		len = cfis[11] << 8 | cfis[3];
610		if (!len)
611			len = 65536;
612		ncq = 1;
613	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
614	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
615	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
616		lba = ((uint64_t)cfis[10] << 40) |
617			((uint64_t)cfis[9] << 32) |
618			((uint64_t)cfis[8] << 24) |
619			((uint64_t)cfis[6] << 16) |
620			((uint64_t)cfis[5] << 8) |
621			cfis[4];
622		len = cfis[13] << 8 | cfis[12];
623		if (!len)
624			len = 65536;
625	} else {
626		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
627			(cfis[5] << 8) | cfis[4];
628		len = cfis[12];
629		if (!len)
630			len = 256;
631	}
632	lba *= blockif_sectsz(p->bctx);
633	len *= blockif_sectsz(p->bctx);
634
635	/* Pull request off free list */
636	aior = STAILQ_FIRST(&p->iofhd);
637	assert(aior != NULL);
638	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
639
640	aior->cfis = cfis;
641	aior->slot = slot;
642	aior->len = len;
643	aior->done = done;
644	breq = &aior->io_req;
645	breq->br_offset = lba + done;
646	ahci_build_iov(p, aior, prdt, hdr->prdtl);
647
648	/* Mark this command in-flight. */
649	p->pending |= 1 << slot;
650
651	/* Stuff request onto busy list. */
652	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
653
654	if (readop)
655		err = blockif_read(p->bctx, breq);
656	else
657		err = blockif_write(p->bctx, breq);
658	assert(err == 0);
659
660	if (ncq)
661		p->ci &= ~(1 << slot);
662}
663
664static void
665ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
666{
667	struct ahci_ioreq *aior;
668	struct blockif_req *breq;
669	int err;
670
671	/*
672	 * Pull request off free list
673	 */
674	aior = STAILQ_FIRST(&p->iofhd);
675	assert(aior != NULL);
676	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
677	aior->cfis = cfis;
678	aior->slot = slot;
679	aior->len = 0;
680	aior->done = 0;
681	aior->more = 0;
682	breq = &aior->io_req;
683
684	/*
685	 * Mark this command in-flight.
686	 */
687	p->pending |= 1 << slot;
688
689	/*
690	 * Stuff request onto busy list
691	 */
692	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
693
694	err = blockif_flush(p->bctx, breq);
695	assert(err == 0);
696}
697
698static inline void
699read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
700		void *buf, int size)
701{
702	struct ahci_cmd_hdr *hdr;
703	struct ahci_prdt_entry *prdt;
704	void *to;
705	int i, len;
706
707	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
708	len = size;
709	to = buf;
710	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
711	for (i = 0; i < hdr->prdtl && len; i++) {
712		uint8_t *ptr;
713		uint32_t dbcsz;
714		int sublen;
715
716		dbcsz = (prdt->dbc & DBCMASK) + 1;
717		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
718		sublen = len < dbcsz ? len : dbcsz;
719		memcpy(to, ptr, sublen);
720		len -= sublen;
721		to += sublen;
722		prdt++;
723	}
724}
725
726static void
727ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
728{
729	struct ahci_ioreq *aior;
730	struct blockif_req *breq;
731	uint8_t *entry;
732	uint64_t elba;
733	uint32_t len, elen;
734	int err;
735	uint8_t buf[512];
736
737	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
738		len = (uint16_t)cfis[13] << 8 | cfis[12];
739		len *= 512;
740	} else { /* ATA_SEND_FPDMA_QUEUED */
741		len = (uint16_t)cfis[11] << 8 | cfis[3];
742		len *= 512;
743	}
744	read_prdt(p, slot, cfis, buf, sizeof(buf));
745
746next:
747	entry = &buf[done];
748	elba = ((uint64_t)entry[5] << 40) |
749		((uint64_t)entry[4] << 32) |
750		((uint64_t)entry[3] << 24) |
751		((uint64_t)entry[2] << 16) |
752		((uint64_t)entry[1] << 8) |
753		entry[0];
754	elen = (uint16_t)entry[7] << 8 | entry[6];
755	done += 8;
756	if (elen == 0) {
757		if (done >= len) {
758			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
759			p->pending &= ~(1 << slot);
760			ahci_check_stopped(p);
761			return;
762		}
763		goto next;
764	}
765
766	/*
767	 * Pull request off free list
768	 */
769	aior = STAILQ_FIRST(&p->iofhd);
770	assert(aior != NULL);
771	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
772	aior->cfis = cfis;
773	aior->slot = slot;
774	aior->len = len;
775	aior->done = done;
776	aior->more = (len != done);
777
778	breq = &aior->io_req;
779	breq->br_offset = elba * blockif_sectsz(p->bctx);
780	breq->br_resid = elen * blockif_sectsz(p->bctx);
781
782	/*
783	 * Mark this command in-flight.
784	 */
785	p->pending |= 1 << slot;
786
787	/*
788	 * Stuff request onto busy list
789	 */
790	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
791
792	err = blockif_delete(p->bctx, breq);
793	assert(err == 0);
794}
795
796static inline void
797write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
798		void *buf, int size)
799{
800	struct ahci_cmd_hdr *hdr;
801	struct ahci_prdt_entry *prdt;
802	void *from;
803	int i, len;
804
805	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
806	len = size;
807	from = buf;
808	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
809	for (i = 0; i < hdr->prdtl && len; i++) {
810		uint8_t *ptr;
811		uint32_t dbcsz;
812		int sublen;
813
814		dbcsz = (prdt->dbc & DBCMASK) + 1;
815		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
816		sublen = len < dbcsz ? len : dbcsz;
817		memcpy(ptr, from, sublen);
818		len -= sublen;
819		from += sublen;
820		prdt++;
821	}
822	hdr->prdbc = size - len;
823}
824
825static void
826ahci_checksum(uint8_t *buf, int size)
827{
828	int i;
829	uint8_t sum = 0;
830
831	for (i = 0; i < size - 1; i++)
832		sum += buf[i];
833	buf[size - 1] = 0x100 - sum;
834}
835
836static void
837ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
838{
839	struct ahci_cmd_hdr *hdr;
840	uint8_t buf[512];
841
842	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
843	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
844	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
845		ahci_write_fis_d2h(p, slot, cfis,
846		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
847		return;
848	}
849
850	memset(buf, 0, sizeof(buf));
851	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
852	ahci_checksum(buf, sizeof(buf));
853
854	if (cfis[2] == ATA_READ_LOG_EXT)
855		ahci_write_fis_piosetup(p);
856	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
857	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
858}
859
860static void
861handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
862{
863	struct ahci_cmd_hdr *hdr;
864
865	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
866	if (p->atapi || hdr->prdtl == 0) {
867		ahci_write_fis_d2h(p, slot, cfis,
868		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
869	} else {
870		uint16_t buf[256];
871		uint64_t sectors;
872		int sectsz, psectsz, psectoff, candelete, ro;
873		uint16_t cyl;
874		uint8_t sech, heads;
875
876		ro = blockif_is_ro(p->bctx);
877		candelete = blockif_candelete(p->bctx);
878		sectsz = blockif_sectsz(p->bctx);
879		sectors = blockif_size(p->bctx) / sectsz;
880		blockif_chs(p->bctx, &cyl, &heads, &sech);
881		blockif_psectsz(p->bctx, &psectsz, &psectoff);
882		memset(buf, 0, sizeof(buf));
883		buf[0] = 0x0040;
884		buf[1] = cyl;
885		buf[3] = heads;
886		buf[6] = sech;
887		ata_string((uint8_t *)(buf+10), p->ident, 20);
888		ata_string((uint8_t *)(buf+23), "001", 8);
889		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
890		buf[47] = (0x8000 | 128);
891		buf[48] = 0x1;
892		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
893		buf[50] = (1 << 14);
894		buf[53] = (1 << 1 | 1 << 2);
895		if (p->mult_sectors)
896			buf[59] = (0x100 | p->mult_sectors);
897		if (sectors <= 0x0fffffff) {
898			buf[60] = sectors;
899			buf[61] = (sectors >> 16);
900		} else {
901			buf[60] = 0xffff;
902			buf[61] = 0x0fff;
903		}
904		buf[63] = 0x7;
905		if (p->xfermode & ATA_WDMA0)
906			buf[63] |= (1 << ((p->xfermode & 7) + 8));
907		buf[64] = 0x3;
908		buf[65] = 120;
909		buf[66] = 120;
910		buf[67] = 120;
911		buf[68] = 120;
912		buf[69] = 0;
913		buf[75] = 31;
914		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
915			   ATA_SUPPORT_NCQ);
916		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
917			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
918		buf[80] = 0x3f0;
919		buf[81] = 0x28;
920		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
921			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
922		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
923			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
924		buf[84] = (1 << 14);
925		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
926			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
927		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
928			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
929		buf[87] = (1 << 14);
930		buf[88] = 0x7f;
931		if (p->xfermode & ATA_UDMA0)
932			buf[88] |= (1 << ((p->xfermode & 7) + 8));
933		buf[100] = sectors;
934		buf[101] = (sectors >> 16);
935		buf[102] = (sectors >> 32);
936		buf[103] = (sectors >> 48);
937		if (candelete && !ro) {
938			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
939			buf[105] = 1;
940			buf[169] = ATA_SUPPORT_DSM_TRIM;
941		}
942		buf[106] = 0x4000;
943		buf[209] = 0x4000;
944		if (psectsz > sectsz) {
945			buf[106] |= 0x2000;
946			buf[106] |= ffsl(psectsz / sectsz) - 1;
947			buf[209] |= (psectoff / sectsz);
948		}
949		if (sectsz > 512) {
950			buf[106] |= 0x1000;
951			buf[117] = sectsz / 2;
952			buf[118] = ((sectsz / 2) >> 16);
953		}
954		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
955		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
956		buf[222] = 0x1020;
957		buf[255] = 0x00a5;
958		ahci_checksum((uint8_t *)buf, sizeof(buf));
959		ahci_write_fis_piosetup(p);
960		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
961		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
962	}
963}
964
965static void
966handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
967{
968	if (!p->atapi) {
969		ahci_write_fis_d2h(p, slot, cfis,
970		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
971	} else {
972		uint16_t buf[256];
973
974		memset(buf, 0, sizeof(buf));
975		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
976		ata_string((uint8_t *)(buf+10), p->ident, 20);
977		ata_string((uint8_t *)(buf+23), "001", 8);
978		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
979		buf[49] = (1 << 9 | 1 << 8);
980		buf[50] = (1 << 14 | 1);
981		buf[53] = (1 << 2 | 1 << 1);
982		buf[62] = 0x3f;
983		buf[63] = 7;
984		if (p->xfermode & ATA_WDMA0)
985			buf[63] |= (1 << ((p->xfermode & 7) + 8));
986		buf[64] = 3;
987		buf[65] = 120;
988		buf[66] = 120;
989		buf[67] = 120;
990		buf[68] = 120;
991		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
992		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
993		buf[78] = (1 << 5);
994		buf[80] = 0x3f0;
995		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
996			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
997		buf[83] = (1 << 14);
998		buf[84] = (1 << 14);
999		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1000			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1001		buf[87] = (1 << 14);
1002		buf[88] = 0x7f;
1003		if (p->xfermode & ATA_UDMA0)
1004			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1005		buf[222] = 0x1020;
1006		buf[255] = 0x00a5;
1007		ahci_checksum((uint8_t *)buf, sizeof(buf));
1008		ahci_write_fis_piosetup(p);
1009		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1010		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1011	}
1012}
1013
1014static void
1015atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1016{
1017	uint8_t buf[36];
1018	uint8_t *acmd;
1019	int len;
1020	uint32_t tfd;
1021
1022	acmd = cfis + 0x40;
1023
1024	if (acmd[1] & 1) {		/* VPD */
1025		if (acmd[2] == 0) {	/* Supported VPD pages */
1026			buf[0] = 0x05;
1027			buf[1] = 0;
1028			buf[2] = 0;
1029			buf[3] = 1;
1030			buf[4] = 0;
1031			len = 4 + buf[3];
1032		} else {
1033			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1034			p->asc = 0x24;
1035			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1036			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1037			ahci_write_fis_d2h(p, slot, cfis, tfd);
1038			return;
1039		}
1040	} else {
1041		buf[0] = 0x05;
1042		buf[1] = 0x80;
1043		buf[2] = 0x00;
1044		buf[3] = 0x21;
1045		buf[4] = 31;
1046		buf[5] = 0;
1047		buf[6] = 0;
1048		buf[7] = 0;
1049		atapi_string(buf + 8, "BHYVE", 8);
1050		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1051		atapi_string(buf + 32, "001", 4);
1052		len = sizeof(buf);
1053	}
1054
1055	if (len > acmd[4])
1056		len = acmd[4];
1057	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1058	write_prdt(p, slot, cfis, buf, len);
1059	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1060}
1061
1062static void
1063atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1064{
1065	uint8_t buf[8];
1066	uint64_t sectors;
1067
1068	sectors = blockif_size(p->bctx) / 2048;
1069	be32enc(buf, sectors - 1);
1070	be32enc(buf + 4, 2048);
1071	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1072	write_prdt(p, slot, cfis, buf, sizeof(buf));
1073	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1074}
1075
1076static void
1077atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1078{
1079	uint8_t *acmd;
1080	uint8_t format;
1081	int len;
1082
1083	acmd = cfis + 0x40;
1084
1085	len = be16dec(acmd + 7);
1086	format = acmd[9] >> 6;
1087	switch (format) {
1088	case 0:
1089	{
1090		int msf, size;
1091		uint64_t sectors;
1092		uint8_t start_track, buf[20], *bp;
1093
1094		msf = (acmd[1] >> 1) & 1;
1095		start_track = acmd[6];
1096		if (start_track > 1 && start_track != 0xaa) {
1097			uint32_t tfd;
1098			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1099			p->asc = 0x24;
1100			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1101			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1102			ahci_write_fis_d2h(p, slot, cfis, tfd);
1103			return;
1104		}
1105		bp = buf + 2;
1106		*bp++ = 1;
1107		*bp++ = 1;
1108		if (start_track <= 1) {
1109			*bp++ = 0;
1110			*bp++ = 0x14;
1111			*bp++ = 1;
1112			*bp++ = 0;
1113			if (msf) {
1114				*bp++ = 0;
1115				lba_to_msf(bp, 0);
1116				bp += 3;
1117			} else {
1118				*bp++ = 0;
1119				*bp++ = 0;
1120				*bp++ = 0;
1121				*bp++ = 0;
1122			}
1123		}
1124		*bp++ = 0;
1125		*bp++ = 0x14;
1126		*bp++ = 0xaa;
1127		*bp++ = 0;
1128		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1129		sectors >>= 2;
1130		if (msf) {
1131			*bp++ = 0;
1132			lba_to_msf(bp, sectors);
1133			bp += 3;
1134		} else {
1135			be32enc(bp, sectors);
1136			bp += 4;
1137		}
1138		size = bp - buf;
1139		be16enc(buf, size - 2);
1140		if (len > size)
1141			len = size;
1142		write_prdt(p, slot, cfis, buf, len);
1143		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1144		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1145		break;
1146	}
1147	case 1:
1148	{
1149		uint8_t buf[12];
1150
1151		memset(buf, 0, sizeof(buf));
1152		buf[1] = 0xa;
1153		buf[2] = 0x1;
1154		buf[3] = 0x1;
1155		if (len > sizeof(buf))
1156			len = sizeof(buf);
1157		write_prdt(p, slot, cfis, buf, len);
1158		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1159		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1160		break;
1161	}
1162	case 2:
1163	{
1164		int msf, size;
1165		uint64_t sectors;
1166		uint8_t start_track, *bp, buf[50];
1167
1168		msf = (acmd[1] >> 1) & 1;
1169		start_track = acmd[6];
1170		bp = buf + 2;
1171		*bp++ = 1;
1172		*bp++ = 1;
1173
1174		*bp++ = 1;
1175		*bp++ = 0x14;
1176		*bp++ = 0;
1177		*bp++ = 0xa0;
1178		*bp++ = 0;
1179		*bp++ = 0;
1180		*bp++ = 0;
1181		*bp++ = 0;
1182		*bp++ = 1;
1183		*bp++ = 0;
1184		*bp++ = 0;
1185
1186		*bp++ = 1;
1187		*bp++ = 0x14;
1188		*bp++ = 0;
1189		*bp++ = 0xa1;
1190		*bp++ = 0;
1191		*bp++ = 0;
1192		*bp++ = 0;
1193		*bp++ = 0;
1194		*bp++ = 1;
1195		*bp++ = 0;
1196		*bp++ = 0;
1197
1198		*bp++ = 1;
1199		*bp++ = 0x14;
1200		*bp++ = 0;
1201		*bp++ = 0xa2;
1202		*bp++ = 0;
1203		*bp++ = 0;
1204		*bp++ = 0;
1205		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1206		sectors >>= 2;
1207		if (msf) {
1208			*bp++ = 0;
1209			lba_to_msf(bp, sectors);
1210			bp += 3;
1211		} else {
1212			be32enc(bp, sectors);
1213			bp += 4;
1214		}
1215
1216		*bp++ = 1;
1217		*bp++ = 0x14;
1218		*bp++ = 0;
1219		*bp++ = 1;
1220		*bp++ = 0;
1221		*bp++ = 0;
1222		*bp++ = 0;
1223		if (msf) {
1224			*bp++ = 0;
1225			lba_to_msf(bp, 0);
1226			bp += 3;
1227		} else {
1228			*bp++ = 0;
1229			*bp++ = 0;
1230			*bp++ = 0;
1231			*bp++ = 0;
1232		}
1233
1234		size = bp - buf;
1235		be16enc(buf, size - 2);
1236		if (len > size)
1237			len = size;
1238		write_prdt(p, slot, cfis, buf, len);
1239		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1240		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1241		break;
1242	}
1243	default:
1244	{
1245		uint32_t tfd;
1246
1247		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1248		p->asc = 0x24;
1249		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1250		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1251		ahci_write_fis_d2h(p, slot, cfis, tfd);
1252		break;
1253	}
1254	}
1255}
1256
1257static void
1258atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1259{
1260	uint8_t buf[16];
1261
1262	memset(buf, 0, sizeof(buf));
1263	buf[3] = 8;
1264
1265	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1266	write_prdt(p, slot, cfis, buf, sizeof(buf));
1267	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1268}
1269
1270static void
1271atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1272{
1273	struct ahci_ioreq *aior;
1274	struct ahci_cmd_hdr *hdr;
1275	struct ahci_prdt_entry *prdt;
1276	struct blockif_req *breq;
1277	struct pci_ahci_softc *sc;
1278	uint8_t *acmd;
1279	uint64_t lba;
1280	uint32_t len;
1281	int err;
1282
1283	sc = p->pr_sc;
1284	acmd = cfis + 0x40;
1285	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1286	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1287
1288	lba = be32dec(acmd + 2);
1289	if (acmd[0] == READ_10)
1290		len = be16dec(acmd + 7);
1291	else
1292		len = be32dec(acmd + 6);
1293	if (len == 0) {
1294		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1295		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1296	}
1297	lba *= 2048;
1298	len *= 2048;
1299
1300	/*
1301	 * Pull request off free list
1302	 */
1303	aior = STAILQ_FIRST(&p->iofhd);
1304	assert(aior != NULL);
1305	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1306	aior->cfis = cfis;
1307	aior->slot = slot;
1308	aior->len = len;
1309	aior->done = done;
1310	breq = &aior->io_req;
1311	breq->br_offset = lba + done;
1312	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1313
1314	/* Mark this command in-flight. */
1315	p->pending |= 1 << slot;
1316
1317	/* Stuff request onto busy list. */
1318	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1319
1320	err = blockif_read(p->bctx, breq);
1321	assert(err == 0);
1322}
1323
1324static void
1325atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1326{
1327	uint8_t buf[64];
1328	uint8_t *acmd;
1329	int len;
1330
1331	acmd = cfis + 0x40;
1332	len = acmd[4];
1333	if (len > sizeof(buf))
1334		len = sizeof(buf);
1335	memset(buf, 0, len);
1336	buf[0] = 0x70 | (1 << 7);
1337	buf[2] = p->sense_key;
1338	buf[7] = 10;
1339	buf[12] = p->asc;
1340	write_prdt(p, slot, cfis, buf, len);
1341	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1342	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1343}
1344
1345static void
1346atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1347{
1348	uint8_t *acmd = cfis + 0x40;
1349	uint32_t tfd;
1350
1351	switch (acmd[4] & 3) {
1352	case 0:
1353	case 1:
1354	case 3:
1355		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1356		tfd = ATA_S_READY | ATA_S_DSC;
1357		break;
1358	case 2:
1359		/* TODO eject media */
1360		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1361		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1362		p->asc = 0x53;
1363		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1364		break;
1365	}
1366	ahci_write_fis_d2h(p, slot, cfis, tfd);
1367}
1368
1369static void
1370atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1371{
1372	uint8_t *acmd;
1373	uint32_t tfd;
1374	uint8_t pc, code;
1375	int len;
1376
1377	acmd = cfis + 0x40;
1378	len = be16dec(acmd + 7);
1379	pc = acmd[2] >> 6;
1380	code = acmd[2] & 0x3f;
1381
1382	switch (pc) {
1383	case 0:
1384		switch (code) {
1385		case MODEPAGE_RW_ERROR_RECOVERY:
1386		{
1387			uint8_t buf[16];
1388
1389			if (len > sizeof(buf))
1390				len = sizeof(buf);
1391
1392			memset(buf, 0, sizeof(buf));
1393			be16enc(buf, 16 - 2);
1394			buf[2] = 0x70;
1395			buf[8] = 0x01;
1396			buf[9] = 16 - 10;
1397			buf[11] = 0x05;
1398			write_prdt(p, slot, cfis, buf, len);
1399			tfd = ATA_S_READY | ATA_S_DSC;
1400			break;
1401		}
1402		case MODEPAGE_CD_CAPABILITIES:
1403		{
1404			uint8_t buf[30];
1405
1406			if (len > sizeof(buf))
1407				len = sizeof(buf);
1408
1409			memset(buf, 0, sizeof(buf));
1410			be16enc(buf, 30 - 2);
1411			buf[2] = 0x70;
1412			buf[8] = 0x2A;
1413			buf[9] = 30 - 10;
1414			buf[10] = 0x08;
1415			buf[12] = 0x71;
1416			be16enc(&buf[18], 2);
1417			be16enc(&buf[20], 512);
1418			write_prdt(p, slot, cfis, buf, len);
1419			tfd = ATA_S_READY | ATA_S_DSC;
1420			break;
1421		}
1422		default:
1423			goto error;
1424			break;
1425		}
1426		break;
1427	case 3:
1428		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1429		p->asc = 0x39;
1430		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1431		break;
1432error:
1433	case 1:
1434	case 2:
1435		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1436		p->asc = 0x24;
1437		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1438		break;
1439	}
1440	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1441	ahci_write_fis_d2h(p, slot, cfis, tfd);
1442}
1443
1444static void
1445atapi_get_event_status_notification(struct ahci_port *p, int slot,
1446    uint8_t *cfis)
1447{
1448	uint8_t *acmd;
1449	uint32_t tfd;
1450
1451	acmd = cfis + 0x40;
1452
1453	/* we don't support asynchronous operation */
1454	if (!(acmd[1] & 1)) {
1455		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1456		p->asc = 0x24;
1457		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1458	} else {
1459		uint8_t buf[8];
1460		int len;
1461
1462		len = be16dec(acmd + 7);
1463		if (len > sizeof(buf))
1464			len = sizeof(buf);
1465
1466		memset(buf, 0, sizeof(buf));
1467		be16enc(buf, 8 - 2);
1468		buf[2] = 0x04;
1469		buf[3] = 0x10;
1470		buf[5] = 0x02;
1471		write_prdt(p, slot, cfis, buf, len);
1472		tfd = ATA_S_READY | ATA_S_DSC;
1473	}
1474	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1475	ahci_write_fis_d2h(p, slot, cfis, tfd);
1476}
1477
1478static void
1479handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1480{
1481	uint8_t *acmd;
1482
1483	acmd = cfis + 0x40;
1484
1485#ifdef AHCI_DEBUG
1486	{
1487		int i;
1488		DPRINTF("ACMD:");
1489		for (i = 0; i < 16; i++)
1490			DPRINTF("%02x ", acmd[i]);
1491		DPRINTF("\n");
1492	}
1493#endif
1494
1495	switch (acmd[0]) {
1496	case TEST_UNIT_READY:
1497		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1498		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1499		break;
1500	case INQUIRY:
1501		atapi_inquiry(p, slot, cfis);
1502		break;
1503	case READ_CAPACITY:
1504		atapi_read_capacity(p, slot, cfis);
1505		break;
1506	case PREVENT_ALLOW:
1507		/* TODO */
1508		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1509		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1510		break;
1511	case READ_TOC:
1512		atapi_read_toc(p, slot, cfis);
1513		break;
1514	case REPORT_LUNS:
1515		atapi_report_luns(p, slot, cfis);
1516		break;
1517	case READ_10:
1518	case READ_12:
1519		atapi_read(p, slot, cfis, 0);
1520		break;
1521	case REQUEST_SENSE:
1522		atapi_request_sense(p, slot, cfis);
1523		break;
1524	case START_STOP_UNIT:
1525		atapi_start_stop_unit(p, slot, cfis);
1526		break;
1527	case MODE_SENSE_10:
1528		atapi_mode_sense(p, slot, cfis);
1529		break;
1530	case GET_EVENT_STATUS_NOTIFICATION:
1531		atapi_get_event_status_notification(p, slot, cfis);
1532		break;
1533	default:
1534		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1535		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1536		p->asc = 0x20;
1537		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1538				ATA_S_READY | ATA_S_ERROR);
1539		break;
1540	}
1541}
1542
1543static void
1544ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1545{
1546
1547	switch (cfis[2]) {
1548	case ATA_ATA_IDENTIFY:
1549		handle_identify(p, slot, cfis);
1550		break;
1551	case ATA_SETFEATURES:
1552	{
1553		switch (cfis[3]) {
1554		case ATA_SF_ENAB_SATA_SF:
1555			switch (cfis[12]) {
1556			case ATA_SATA_SF_AN:
1557				p->tfd = ATA_S_DSC | ATA_S_READY;
1558				break;
1559			default:
1560				p->tfd = ATA_S_ERROR | ATA_S_READY;
1561				p->tfd |= (ATA_ERROR_ABORT << 8);
1562				break;
1563			}
1564			break;
1565		case ATA_SF_ENAB_WCACHE:
1566		case ATA_SF_DIS_WCACHE:
1567		case ATA_SF_ENAB_RCACHE:
1568		case ATA_SF_DIS_RCACHE:
1569			p->tfd = ATA_S_DSC | ATA_S_READY;
1570			break;
1571		case ATA_SF_SETXFER:
1572		{
1573			switch (cfis[12] & 0xf8) {
1574			case ATA_PIO:
1575			case ATA_PIO0:
1576				break;
1577			case ATA_WDMA0:
1578			case ATA_UDMA0:
1579				p->xfermode = (cfis[12] & 0x7);
1580				break;
1581			}
1582			p->tfd = ATA_S_DSC | ATA_S_READY;
1583			break;
1584		}
1585		default:
1586			p->tfd = ATA_S_ERROR | ATA_S_READY;
1587			p->tfd |= (ATA_ERROR_ABORT << 8);
1588			break;
1589		}
1590		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1591		break;
1592	}
1593	case ATA_SET_MULTI:
1594		if (cfis[12] != 0 &&
1595			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1596			p->tfd = ATA_S_ERROR | ATA_S_READY;
1597			p->tfd |= (ATA_ERROR_ABORT << 8);
1598		} else {
1599			p->mult_sectors = cfis[12];
1600			p->tfd = ATA_S_DSC | ATA_S_READY;
1601		}
1602		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1603		break;
1604	case ATA_READ:
1605	case ATA_WRITE:
1606	case ATA_READ48:
1607	case ATA_WRITE48:
1608	case ATA_READ_MUL:
1609	case ATA_WRITE_MUL:
1610	case ATA_READ_MUL48:
1611	case ATA_WRITE_MUL48:
1612	case ATA_READ_DMA:
1613	case ATA_WRITE_DMA:
1614	case ATA_READ_DMA48:
1615	case ATA_WRITE_DMA48:
1616	case ATA_READ_FPDMA_QUEUED:
1617	case ATA_WRITE_FPDMA_QUEUED:
1618		ahci_handle_rw(p, slot, cfis, 0);
1619		break;
1620	case ATA_FLUSHCACHE:
1621	case ATA_FLUSHCACHE48:
1622		ahci_handle_flush(p, slot, cfis);
1623		break;
1624	case ATA_DATA_SET_MANAGEMENT:
1625		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1626		    cfis[13] == 0 && cfis[12] == 1) {
1627			ahci_handle_dsm_trim(p, slot, cfis, 0);
1628			break;
1629		}
1630		ahci_write_fis_d2h(p, slot, cfis,
1631		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1632		break;
1633	case ATA_SEND_FPDMA_QUEUED:
1634		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1635		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1636		    cfis[11] == 0 && cfis[13] == 1) {
1637			ahci_handle_dsm_trim(p, slot, cfis, 0);
1638			break;
1639		}
1640		ahci_write_fis_d2h(p, slot, cfis,
1641		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1642		break;
1643	case ATA_READ_LOG_EXT:
1644	case ATA_READ_LOG_DMA_EXT:
1645		ahci_handle_read_log(p, slot, cfis);
1646		break;
1647	case ATA_NOP:
1648		ahci_write_fis_d2h(p, slot, cfis,
1649		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1650		break;
1651	case ATA_STANDBY_CMD:
1652	case ATA_STANDBY_IMMEDIATE:
1653	case ATA_IDLE_CMD:
1654	case ATA_IDLE_IMMEDIATE:
1655	case ATA_SLEEP:
1656		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1657		break;
1658	case ATA_ATAPI_IDENTIFY:
1659		handle_atapi_identify(p, slot, cfis);
1660		break;
1661	case ATA_PACKET_CMD:
1662		if (!p->atapi) {
1663			ahci_write_fis_d2h(p, slot, cfis,
1664			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1665		} else
1666			handle_packet_cmd(p, slot, cfis);
1667		break;
1668	default:
1669		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1670		ahci_write_fis_d2h(p, slot, cfis,
1671		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1672		break;
1673	}
1674}
1675
1676static void
1677ahci_handle_slot(struct ahci_port *p, int slot)
1678{
1679	struct ahci_cmd_hdr *hdr;
1680	struct ahci_prdt_entry *prdt;
1681	struct pci_ahci_softc *sc;
1682	uint8_t *cfis;
1683	int cfl;
1684
1685	sc = p->pr_sc;
1686	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1687	cfl = (hdr->flags & 0x1f) * 4;
1688	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1689			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1690	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1691
1692#ifdef AHCI_DEBUG
1693	DPRINTF("\ncfis:");
1694	for (i = 0; i < cfl; i++) {
1695		if (i % 10 == 0)
1696			DPRINTF("\n");
1697		DPRINTF("%02x ", cfis[i]);
1698	}
1699	DPRINTF("\n");
1700
1701	for (i = 0; i < hdr->prdtl; i++) {
1702		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1703		prdt++;
1704	}
1705#endif
1706
1707	if (cfis[0] != FIS_TYPE_REGH2D) {
1708		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1709		return;
1710	}
1711
1712	if (cfis[1] & 0x80) {
1713		ahci_handle_cmd(p, slot, cfis);
1714	} else {
1715		if (cfis[15] & (1 << 2))
1716			p->reset = 1;
1717		else if (p->reset) {
1718			p->reset = 0;
1719			ahci_port_reset(p);
1720		}
1721		p->ci &= ~(1 << slot);
1722	}
1723}
1724
1725static void
1726ahci_handle_port(struct ahci_port *p)
1727{
1728	int i;
1729
1730	if (!(p->cmd & AHCI_P_CMD_ST))
1731		return;
1732
1733	/*
1734	 * Search for any new commands to issue ignoring those that
1735	 * are already in-flight.
1736	 */
1737	for (i = 0; (i < 32) && p->ci; i++) {
1738		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1739			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1740			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1741			ahci_handle_slot(p, i);
1742		}
1743	}
1744}
1745
1746/*
1747 * blockif callback routine - this runs in the context of the blockif
1748 * i/o thread, so the mutex needs to be acquired.
1749 */
1750static void
1751ata_ioreq_cb(struct blockif_req *br, int err)
1752{
1753	struct ahci_cmd_hdr *hdr;
1754	struct ahci_ioreq *aior;
1755	struct ahci_port *p;
1756	struct pci_ahci_softc *sc;
1757	uint32_t tfd;
1758	uint8_t *cfis;
1759	int slot, ncq, dsm;
1760
1761	DPRINTF("%s %d\n", __func__, err);
1762
1763	ncq = dsm = 0;
1764	aior = br->br_param;
1765	p = aior->io_pr;
1766	cfis = aior->cfis;
1767	slot = aior->slot;
1768	sc = p->pr_sc;
1769	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1770
1771	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1772	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1773	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1774		ncq = 1;
1775	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1776	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1777	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1778		dsm = 1;
1779
1780	pthread_mutex_lock(&sc->mtx);
1781
1782	/*
1783	 * Delete the blockif request from the busy list
1784	 */
1785	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1786
1787	/*
1788	 * Move the blockif request back to the free list
1789	 */
1790	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1791
1792	if (!err)
1793		hdr->prdbc = aior->done;
1794
1795	if (!err && aior->more) {
1796		if (dsm)
1797			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1798		else
1799			ahci_handle_rw(p, slot, cfis, aior->done);
1800		goto out;
1801	}
1802
1803	if (!err)
1804		tfd = ATA_S_READY | ATA_S_DSC;
1805	else
1806		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1807	if (ncq)
1808		ahci_write_fis_sdb(p, slot, cfis, tfd);
1809	else
1810		ahci_write_fis_d2h(p, slot, cfis, tfd);
1811
1812	/*
1813	 * This command is now complete.
1814	 */
1815	p->pending &= ~(1 << slot);
1816
1817	ahci_check_stopped(p);
1818out:
1819	pthread_mutex_unlock(&sc->mtx);
1820	DPRINTF("%s exit\n", __func__);
1821}
1822
1823static void
1824atapi_ioreq_cb(struct blockif_req *br, int err)
1825{
1826	struct ahci_cmd_hdr *hdr;
1827	struct ahci_ioreq *aior;
1828	struct ahci_port *p;
1829	struct pci_ahci_softc *sc;
1830	uint8_t *cfis;
1831	uint32_t tfd;
1832	int slot;
1833
1834	DPRINTF("%s %d\n", __func__, err);
1835
1836	aior = br->br_param;
1837	p = aior->io_pr;
1838	cfis = aior->cfis;
1839	slot = aior->slot;
1840	sc = p->pr_sc;
1841	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1842
1843	pthread_mutex_lock(&sc->mtx);
1844
1845	/*
1846	 * Delete the blockif request from the busy list
1847	 */
1848	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1849
1850	/*
1851	 * Move the blockif request back to the free list
1852	 */
1853	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1854
1855	if (!err)
1856		hdr->prdbc = aior->done;
1857
1858	if (!err && aior->more) {
1859		atapi_read(p, slot, cfis, aior->done);
1860		goto out;
1861	}
1862
1863	if (!err) {
1864		tfd = ATA_S_READY | ATA_S_DSC;
1865	} else {
1866		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1867		p->asc = 0x21;
1868		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1869	}
1870	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1871	ahci_write_fis_d2h(p, slot, cfis, tfd);
1872
1873	/*
1874	 * This command is now complete.
1875	 */
1876	p->pending &= ~(1 << slot);
1877
1878	ahci_check_stopped(p);
1879out:
1880	pthread_mutex_unlock(&sc->mtx);
1881	DPRINTF("%s exit\n", __func__);
1882}
1883
1884static void
1885pci_ahci_ioreq_init(struct ahci_port *pr)
1886{
1887	struct ahci_ioreq *vr;
1888	int i;
1889
1890	pr->ioqsz = blockif_queuesz(pr->bctx);
1891	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1892	STAILQ_INIT(&pr->iofhd);
1893
1894	/*
1895	 * Add all i/o request entries to the free queue
1896	 */
1897	for (i = 0; i < pr->ioqsz; i++) {
1898		vr = &pr->ioreq[i];
1899		vr->io_pr = pr;
1900		if (!pr->atapi)
1901			vr->io_req.br_callback = ata_ioreq_cb;
1902		else
1903			vr->io_req.br_callback = atapi_ioreq_cb;
1904		vr->io_req.br_param = vr;
1905		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1906	}
1907
1908	TAILQ_INIT(&pr->iobhd);
1909}
1910
1911static void
1912pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1913{
1914	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1915	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1916	struct ahci_port *p = &sc->port[port];
1917
1918	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1919		port, offset, value);
1920
1921	switch (offset) {
1922	case AHCI_P_CLB:
1923		p->clb = value;
1924		break;
1925	case AHCI_P_CLBU:
1926		p->clbu = value;
1927		break;
1928	case AHCI_P_FB:
1929		p->fb = value;
1930		break;
1931	case AHCI_P_FBU:
1932		p->fbu = value;
1933		break;
1934	case AHCI_P_IS:
1935		p->is &= ~value;
1936		break;
1937	case AHCI_P_IE:
1938		p->ie = value & 0xFDC000FF;
1939		ahci_generate_intr(sc);
1940		break;
1941	case AHCI_P_CMD:
1942	{
1943		p->cmd = value;
1944
1945		if (!(value & AHCI_P_CMD_ST)) {
1946			ahci_port_stop(p);
1947		} else {
1948			uint64_t clb;
1949
1950			p->cmd |= AHCI_P_CMD_CR;
1951			clb = (uint64_t)p->clbu << 32 | p->clb;
1952			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1953					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1954		}
1955
1956		if (value & AHCI_P_CMD_FRE) {
1957			uint64_t fb;
1958
1959			p->cmd |= AHCI_P_CMD_FR;
1960			fb = (uint64_t)p->fbu << 32 | p->fb;
1961			/* we don't support FBSCP, so rfis size is 256Bytes */
1962			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1963		} else {
1964			p->cmd &= ~AHCI_P_CMD_FR;
1965		}
1966
1967		if (value & AHCI_P_CMD_CLO) {
1968			p->tfd = 0;
1969			p->cmd &= ~AHCI_P_CMD_CLO;
1970		}
1971
1972		ahci_handle_port(p);
1973		break;
1974	}
1975	case AHCI_P_TFD:
1976	case AHCI_P_SIG:
1977	case AHCI_P_SSTS:
1978		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1979		break;
1980	case AHCI_P_SCTL:
1981		p->sctl = value;
1982		if (!(p->cmd & AHCI_P_CMD_ST)) {
1983			if (value & ATA_SC_DET_RESET)
1984				ahci_port_reset(p);
1985		}
1986		break;
1987	case AHCI_P_SERR:
1988		p->serr &= ~value;
1989		break;
1990	case AHCI_P_SACT:
1991		p->sact |= value;
1992		break;
1993	case AHCI_P_CI:
1994		p->ci |= value;
1995		ahci_handle_port(p);
1996		break;
1997	case AHCI_P_SNTF:
1998	case AHCI_P_FBS:
1999	default:
2000		break;
2001	}
2002}
2003
2004static void
2005pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2006{
2007	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2008		offset, value);
2009
2010	switch (offset) {
2011	case AHCI_CAP:
2012	case AHCI_PI:
2013	case AHCI_VS:
2014	case AHCI_CAP2:
2015		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2016		break;
2017	case AHCI_GHC:
2018		if (value & AHCI_GHC_HR)
2019			ahci_reset(sc);
2020		else if (value & AHCI_GHC_IE) {
2021			sc->ghc |= AHCI_GHC_IE;
2022			ahci_generate_intr(sc);
2023		}
2024		break;
2025	case AHCI_IS:
2026		sc->is &= ~value;
2027		ahci_generate_intr(sc);
2028		break;
2029	default:
2030		break;
2031	}
2032}
2033
2034static void
2035pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2036		int baridx, uint64_t offset, int size, uint64_t value)
2037{
2038	struct pci_ahci_softc *sc = pi->pi_arg;
2039
2040	assert(baridx == 5);
2041	assert(size == 4);
2042
2043	pthread_mutex_lock(&sc->mtx);
2044
2045	if (offset < AHCI_OFFSET)
2046		pci_ahci_host_write(sc, offset, value);
2047	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2048		pci_ahci_port_write(sc, offset, value);
2049	else
2050		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2051
2052	pthread_mutex_unlock(&sc->mtx);
2053}
2054
2055static uint64_t
2056pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2057{
2058	uint32_t value;
2059
2060	switch (offset) {
2061	case AHCI_CAP:
2062	case AHCI_GHC:
2063	case AHCI_IS:
2064	case AHCI_PI:
2065	case AHCI_VS:
2066	case AHCI_CCCC:
2067	case AHCI_CCCP:
2068	case AHCI_EM_LOC:
2069	case AHCI_EM_CTL:
2070	case AHCI_CAP2:
2071	{
2072		uint32_t *p = &sc->cap;
2073		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2074		value = *p;
2075		break;
2076	}
2077	default:
2078		value = 0;
2079		break;
2080	}
2081	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2082		offset, value);
2083
2084	return (value);
2085}
2086
2087static uint64_t
2088pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2089{
2090	uint32_t value;
2091	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2092	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2093
2094	switch (offset) {
2095	case AHCI_P_CLB:
2096	case AHCI_P_CLBU:
2097	case AHCI_P_FB:
2098	case AHCI_P_FBU:
2099	case AHCI_P_IS:
2100	case AHCI_P_IE:
2101	case AHCI_P_CMD:
2102	case AHCI_P_TFD:
2103	case AHCI_P_SIG:
2104	case AHCI_P_SSTS:
2105	case AHCI_P_SCTL:
2106	case AHCI_P_SERR:
2107	case AHCI_P_SACT:
2108	case AHCI_P_CI:
2109	case AHCI_P_SNTF:
2110	case AHCI_P_FBS:
2111	{
2112		uint32_t *p= &sc->port[port].clb;
2113		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2114		value = *p;
2115		break;
2116	}
2117	default:
2118		value = 0;
2119		break;
2120	}
2121
2122	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2123		port, offset, value);
2124
2125	return value;
2126}
2127
2128static uint64_t
2129pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2130    uint64_t offset, int size)
2131{
2132	struct pci_ahci_softc *sc = pi->pi_arg;
2133	uint32_t value;
2134
2135	assert(baridx == 5);
2136	assert(size == 4);
2137
2138	pthread_mutex_lock(&sc->mtx);
2139
2140	if (offset < AHCI_OFFSET)
2141		value = pci_ahci_host_read(sc, offset);
2142	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2143		value = pci_ahci_port_read(sc, offset);
2144	else {
2145		value = 0;
2146		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
2147	}
2148
2149	pthread_mutex_unlock(&sc->mtx);
2150
2151	return (value);
2152}
2153
2154static int
2155pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2156{
2157	char bident[sizeof("XX:X:X")];
2158	struct blockif_ctxt *bctxt;
2159	struct pci_ahci_softc *sc;
2160	int ret, slots;
2161	MD5_CTX mdctx;
2162	u_char digest[16];
2163
2164	ret = 0;
2165
2166	if (opts == NULL) {
2167		fprintf(stderr, "pci_ahci: backing device required\n");
2168		return (1);
2169	}
2170
2171#ifdef AHCI_DEBUG
2172	dbg = fopen("/tmp/log", "w+");
2173#endif
2174
2175	sc = calloc(1, sizeof(struct pci_ahci_softc));
2176	pi->pi_arg = sc;
2177	sc->asc_pi = pi;
2178	sc->ports = MAX_PORTS;
2179
2180	/*
2181	 * Only use port 0 for a backing device. All other ports will be
2182	 * marked as unused
2183	 */
2184	sc->port[0].atapi = atapi;
2185
2186	/*
2187	 * Attempt to open the backing image. Use the PCI
2188	 * slot/func for the identifier string.
2189	 */
2190	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2191	bctxt = blockif_open(opts, bident);
2192	if (bctxt == NULL) {
2193		ret = 1;
2194		goto open_fail;
2195	}
2196	sc->port[0].bctx = bctxt;
2197	sc->port[0].pr_sc = sc;
2198
2199	/*
2200	 * Create an identifier for the backing file. Use parts of the
2201	 * md5 sum of the filename
2202	 */
2203	MD5Init(&mdctx);
2204	MD5Update(&mdctx, opts, strlen(opts));
2205	MD5Final(digest, &mdctx);
2206	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2207	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
2208
2209	/*
2210	 * Allocate blockif request structures and add them
2211	 * to the free list
2212	 */
2213	pci_ahci_ioreq_init(&sc->port[0]);
2214
2215	pthread_mutex_init(&sc->mtx, NULL);
2216
2217	/* Intel ICH8 AHCI */
2218	slots = sc->port[0].ioqsz;
2219	if (slots > 32)
2220		slots = 32;
2221	--slots;
2222	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2223	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2224	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2225	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2226	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2227
2228	/* Only port 0 implemented */
2229	sc->pi = 1;
2230	sc->vs = 0x10300;
2231	sc->cap2 = AHCI_CAP2_APST;
2232	ahci_reset(sc);
2233
2234	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2235	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2236	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2237	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2238	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2239	pci_emul_add_msicap(pi, 1);
2240	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2241	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2242
2243	pci_lintr_request(pi);
2244
2245open_fail:
2246	if (ret) {
2247		blockif_close(sc->port[0].bctx);
2248		free(sc);
2249	}
2250
2251	return (ret);
2252}
2253
2254static int
2255pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2256{
2257
2258	return (pci_ahci_init(ctx, pi, opts, 0));
2259}
2260
2261static int
2262pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2263{
2264
2265	return (pci_ahci_init(ctx, pi, opts, 1));
2266}
2267
2268/*
2269 * Use separate emulation names to distinguish drive and atapi devices
2270 */
2271struct pci_devemu pci_de_ahci_hd = {
2272	.pe_emu =	"ahci-hd",
2273	.pe_init =	pci_ahci_hd_init,
2274	.pe_barwrite =	pci_ahci_write,
2275	.pe_barread =	pci_ahci_read
2276};
2277PCI_EMUL_SET(pci_de_ahci_hd);
2278
2279struct pci_devemu pci_de_ahci_cd = {
2280	.pe_emu =	"ahci-cd",
2281	.pe_init =	pci_ahci_atapi_init,
2282	.pe_barwrite =	pci_ahci_write,
2283	.pe_barread =	pci_ahci_read
2284};
2285PCI_EMUL_SET(pci_de_ahci_cd);
2286