pci_ahci.c revision 282307
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 282307 2015-05-01 17:30:59Z mav $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 282307 2015-05-01 17:30:59Z mav $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <pthread_np.h>
52#include <inttypes.h>
53#include <md5.h>
54
55#include "bhyverun.h"
56#include "pci_emul.h"
57#include "ahci.h"
58#include "block_if.h"
59
60#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
61
62#define	PxSIG_ATA	0x00000101 /* ATA drive */
63#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
64
65enum sata_fis_type {
66	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
67	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
68	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
69	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
70	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
71	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
72	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
73	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
74};
75
76/*
77 * SCSI opcodes
78 */
79#define	TEST_UNIT_READY		0x00
80#define	REQUEST_SENSE		0x03
81#define	INQUIRY			0x12
82#define	START_STOP_UNIT		0x1B
83#define	PREVENT_ALLOW		0x1E
84#define	READ_CAPACITY		0x25
85#define	READ_10			0x28
86#define	POSITION_TO_ELEMENT	0x2B
87#define	READ_TOC		0x43
88#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
89#define	MODE_SENSE_10		0x5A
90#define	REPORT_LUNS		0xA0
91#define	READ_12			0xA8
92#define	READ_CD			0xBE
93
94/*
95 * SCSI mode page codes
96 */
97#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
98#define	MODEPAGE_CD_CAPABILITIES	0x2A
99
100/*
101 * ATA commands
102 */
103#define	ATA_SF_ENAB_SATA_SF		0x10
104#define		ATA_SATA_SF_AN		0x05
105#define	ATA_SF_DIS_SATA_SF		0x90
106
107/*
108 * Debug printf
109 */
110#ifdef AHCI_DEBUG
111static FILE *dbg;
112#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
113#else
114#define DPRINTF(format, arg...)
115#endif
116#define WPRINTF(format, arg...) printf(format, ##arg)
117
118struct ahci_ioreq {
119	struct blockif_req io_req;
120	struct ahci_port *io_pr;
121	STAILQ_ENTRY(ahci_ioreq) io_flist;
122	TAILQ_ENTRY(ahci_ioreq) io_blist;
123	uint8_t *cfis;
124	uint32_t len;
125	uint32_t done;
126	int slot;
127	int more;
128};
129
130struct ahci_port {
131	struct blockif_ctxt *bctx;
132	struct pci_ahci_softc *pr_sc;
133	uint8_t *cmd_lst;
134	uint8_t *rfis;
135	char ident[20 + 1];
136	int atapi;
137	int reset;
138	int mult_sectors;
139	uint8_t xfermode;
140	uint8_t err_cfis[20];
141	uint8_t sense_key;
142	uint8_t asc;
143	uint32_t pending;
144
145	uint32_t clb;
146	uint32_t clbu;
147	uint32_t fb;
148	uint32_t fbu;
149	uint32_t is;
150	uint32_t ie;
151	uint32_t cmd;
152	uint32_t unused0;
153	uint32_t tfd;
154	uint32_t sig;
155	uint32_t ssts;
156	uint32_t sctl;
157	uint32_t serr;
158	uint32_t sact;
159	uint32_t ci;
160	uint32_t sntf;
161	uint32_t fbs;
162
163	/*
164	 * i/o request info
165	 */
166	struct ahci_ioreq *ioreq;
167	int ioqsz;
168	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
169	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
170};
171
172struct ahci_cmd_hdr {
173	uint16_t flags;
174	uint16_t prdtl;
175	uint32_t prdbc;
176	uint64_t ctba;
177	uint32_t reserved[4];
178};
179
180struct ahci_prdt_entry {
181	uint64_t dba;
182	uint32_t reserved;
183#define	DBCMASK		0x3fffff
184	uint32_t dbc;
185};
186
187struct pci_ahci_softc {
188	struct pci_devinst *asc_pi;
189	pthread_mutex_t	mtx;
190	int ports;
191	uint32_t cap;
192	uint32_t ghc;
193	uint32_t is;
194	uint32_t pi;
195	uint32_t vs;
196	uint32_t ccc_ctl;
197	uint32_t ccc_pts;
198	uint32_t em_loc;
199	uint32_t em_ctl;
200	uint32_t cap2;
201	uint32_t bohc;
202	uint32_t lintr;
203	struct ahci_port port[MAX_PORTS];
204};
205#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
206
207static inline void lba_to_msf(uint8_t *buf, int lba)
208{
209	lba += 150;
210	buf[0] = (lba / 75) / 60;
211	buf[1] = (lba / 75) % 60;
212	buf[2] = lba % 75;
213}
214
215/*
216 * generate HBA intr depending on whether or not ports within
217 * the controller have an interrupt pending.
218 */
219static void
220ahci_generate_intr(struct pci_ahci_softc *sc)
221{
222	struct pci_devinst *pi;
223	int i;
224
225	pi = sc->asc_pi;
226
227	for (i = 0; i < sc->ports; i++) {
228		struct ahci_port *pr;
229		pr = &sc->port[i];
230		if (pr->is & pr->ie)
231			sc->is |= (1 << i);
232	}
233
234	DPRINTF("%s %x\n", __func__, sc->is);
235
236	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
237		if (pci_msi_enabled(pi)) {
238			/*
239			 * Generate an MSI interrupt on every edge
240			 */
241			pci_generate_msi(pi, 0);
242		} else if (!sc->lintr) {
243			/*
244			 * Only generate a pin-based interrupt if one wasn't
245			 * in progress
246			 */
247			sc->lintr = 1;
248			pci_lintr_assert(pi);
249		}
250	} else if (sc->lintr) {
251		/*
252		 * No interrupts: deassert pin-based signal if it had
253		 * been asserted
254		 */
255		pci_lintr_deassert(pi);
256		sc->lintr = 0;
257	}
258}
259
260static void
261ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
262{
263	int offset, len, irq;
264
265	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
266		return;
267
268	switch (ft) {
269	case FIS_TYPE_REGD2H:
270		offset = 0x40;
271		len = 20;
272		irq = AHCI_P_IX_DHR;
273		break;
274	case FIS_TYPE_SETDEVBITS:
275		offset = 0x58;
276		len = 8;
277		irq = AHCI_P_IX_SDB;
278		break;
279	case FIS_TYPE_PIOSETUP:
280		offset = 0x20;
281		len = 20;
282		irq = 0;
283		break;
284	default:
285		WPRINTF("unsupported fis type %d\n", ft);
286		return;
287	}
288	memcpy(p->rfis + offset, fis, len);
289	if (irq) {
290		p->is |= irq;
291		ahci_generate_intr(p->pr_sc);
292	}
293}
294
295static void
296ahci_write_fis_piosetup(struct ahci_port *p)
297{
298	uint8_t fis[20];
299
300	memset(fis, 0, sizeof(fis));
301	fis[0] = FIS_TYPE_PIOSETUP;
302	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
303}
304
305static void
306ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
307{
308	uint8_t fis[8];
309	uint8_t error;
310
311	error = (tfd >> 8) & 0xff;
312	memset(fis, 0, sizeof(fis));
313	fis[0] = FIS_TYPE_SETDEVBITS;
314	fis[1] = (1 << 6);
315	fis[2] = tfd & 0x77;
316	fis[3] = error;
317	if (fis[2] & ATA_S_ERROR) {
318		p->is |= AHCI_P_IX_TFE;
319		p->err_cfis[0] = slot;
320		p->err_cfis[2] = tfd & 0x77;
321		p->err_cfis[3] = error;
322		memcpy(&p->err_cfis[4], cfis + 4, 16);
323	} else {
324		*(uint32_t *)(fis + 4) = (1 << slot);
325		p->sact &= ~(1 << slot);
326	}
327	p->tfd = tfd;
328	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
329}
330
331static void
332ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
333{
334	uint8_t fis[20];
335	uint8_t error;
336
337	error = (tfd >> 8) & 0xff;
338	memset(fis, 0, sizeof(fis));
339	fis[0] = FIS_TYPE_REGD2H;
340	fis[1] = (1 << 6);
341	fis[2] = tfd & 0xff;
342	fis[3] = error;
343	fis[4] = cfis[4];
344	fis[5] = cfis[5];
345	fis[6] = cfis[6];
346	fis[7] = cfis[7];
347	fis[8] = cfis[8];
348	fis[9] = cfis[9];
349	fis[10] = cfis[10];
350	fis[11] = cfis[11];
351	fis[12] = cfis[12];
352	fis[13] = cfis[13];
353	if (fis[2] & ATA_S_ERROR) {
354		p->is |= AHCI_P_IX_TFE;
355		p->err_cfis[0] = 0x80;
356		p->err_cfis[2] = tfd & 0xff;
357		p->err_cfis[3] = error;
358		memcpy(&p->err_cfis[4], cfis + 4, 16);
359	} else
360		p->ci &= ~(1 << slot);
361	p->tfd = tfd;
362	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
363}
364
365static void
366ahci_write_reset_fis_d2h(struct ahci_port *p)
367{
368	uint8_t fis[20];
369
370	memset(fis, 0, sizeof(fis));
371	fis[0] = FIS_TYPE_REGD2H;
372	fis[3] = 1;
373	fis[4] = 1;
374	if (p->atapi) {
375		fis[5] = 0x14;
376		fis[6] = 0xeb;
377	}
378	fis[12] = 1;
379	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
380}
381
382static void
383ahci_check_stopped(struct ahci_port *p)
384{
385	/*
386	 * If we are no longer processing the command list and nothing
387	 * is in-flight, clear the running bit, the current command
388	 * slot, the command issue and active bits.
389	 */
390	if (!(p->cmd & AHCI_P_CMD_ST)) {
391		if (p->pending == 0) {
392			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
393			p->ci = 0;
394			p->sact = 0;
395		}
396	}
397}
398
399static void
400ahci_port_stop(struct ahci_port *p)
401{
402	struct ahci_ioreq *aior;
403	uint8_t *cfis;
404	int slot;
405	int ncq;
406	int error;
407
408	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
409
410	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
411		/*
412		 * Try to cancel the outstanding blockif request.
413		 */
414		error = blockif_cancel(p->bctx, &aior->io_req);
415		if (error != 0)
416			continue;
417
418		slot = aior->slot;
419		cfis = aior->cfis;
420		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
421		    cfis[2] == ATA_READ_FPDMA_QUEUED)
422			ncq = 1;
423
424		if (ncq)
425			p->sact &= ~(1 << slot);
426		else
427			p->ci &= ~(1 << slot);
428
429		/*
430		 * This command is now done.
431		 */
432		p->pending &= ~(1 << slot);
433
434		/*
435		 * Delete the blockif request from the busy list
436		 */
437		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
438
439		/*
440		 * Move the blockif request back to the free list
441		 */
442		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
443	}
444
445	ahci_check_stopped(p);
446}
447
448static void
449ahci_port_reset(struct ahci_port *pr)
450{
451	pr->serr = 0;
452	pr->sact = 0;
453	pr->xfermode = ATA_UDMA6;
454	pr->mult_sectors = 128;
455
456	if (!pr->bctx) {
457		pr->ssts = ATA_SS_DET_NO_DEVICE;
458		pr->sig = 0xFFFFFFFF;
459		pr->tfd = 0x7F;
460		return;
461	}
462	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
463	if (pr->sctl & ATA_SC_SPD_MASK)
464		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
465	else
466		pr->ssts |= ATA_SS_SPD_GEN3;
467	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
468	if (!pr->atapi) {
469		pr->sig = PxSIG_ATA;
470		pr->tfd |= ATA_S_READY;
471	} else
472		pr->sig = PxSIG_ATAPI;
473	ahci_write_reset_fis_d2h(pr);
474}
475
476static void
477ahci_reset(struct pci_ahci_softc *sc)
478{
479	int i;
480
481	sc->ghc = AHCI_GHC_AE;
482	sc->is = 0;
483
484	if (sc->lintr) {
485		pci_lintr_deassert(sc->asc_pi);
486		sc->lintr = 0;
487	}
488
489	for (i = 0; i < sc->ports; i++) {
490		sc->port[i].ie = 0;
491		sc->port[i].is = 0;
492		sc->port[i].sctl = 0;
493		ahci_port_reset(&sc->port[i]);
494	}
495}
496
497static void
498ata_string(uint8_t *dest, const char *src, int len)
499{
500	int i;
501
502	for (i = 0; i < len; i++) {
503		if (*src)
504			dest[i ^ 1] = *src++;
505		else
506			dest[i ^ 1] = ' ';
507	}
508}
509
510static void
511atapi_string(uint8_t *dest, const char *src, int len)
512{
513	int i;
514
515	for (i = 0; i < len; i++) {
516		if (*src)
517			dest[i] = *src++;
518		else
519			dest[i] = ' ';
520	}
521}
522
523/*
524 * Build up the iovec based on the PRDT, 'done' and 'len'.
525 */
526static void
527ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
528    struct ahci_prdt_entry *prdt, uint16_t prdtl)
529{
530	struct blockif_req *breq = &aior->io_req;
531	int i, j, skip, todo, left, extra;
532	uint32_t dbcsz;
533
534	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
535	skip = aior->done;
536	left = aior->len - aior->done;
537	todo = 0;
538	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
539	    i++, prdt++) {
540		dbcsz = (prdt->dbc & DBCMASK) + 1;
541		/* Skip already done part of the PRDT */
542		if (dbcsz <= skip) {
543			skip -= dbcsz;
544			continue;
545		}
546		dbcsz -= skip;
547		if (dbcsz > left)
548			dbcsz = left;
549		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
550		    prdt->dba + skip, dbcsz);
551		breq->br_iov[j].iov_len = dbcsz;
552		todo += dbcsz;
553		left -= dbcsz;
554		skip = 0;
555		j++;
556	}
557
558	/* If we got limited by IOV length, round I/O down to sector size. */
559	if (j == BLOCKIF_IOV_MAX) {
560		extra = todo % blockif_sectsz(p->bctx);
561		todo -= extra;
562		assert(todo > 0);
563		while (extra > 0) {
564			if (breq->br_iov[j - 1].iov_len > extra) {
565				breq->br_iov[j - 1].iov_len -= extra;
566				break;
567			}
568			extra -= breq->br_iov[j - 1].iov_len;
569			j--;
570		}
571	}
572
573	breq->br_iovcnt = j;
574	breq->br_resid = todo;
575	aior->done += todo;
576	aior->more = (aior->done < aior->len && i < prdtl);
577}
578
579static void
580ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
581{
582	struct ahci_ioreq *aior;
583	struct blockif_req *breq;
584	struct ahci_prdt_entry *prdt;
585	struct ahci_cmd_hdr *hdr;
586	uint64_t lba;
587	uint32_t len;
588	int err, ncq, readop;
589
590	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
591	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
592	ncq = 0;
593	readop = 1;
594
595	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
596	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
597	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
598	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
599		readop = 0;
600
601	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
602	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
603		lba = ((uint64_t)cfis[10] << 40) |
604			((uint64_t)cfis[9] << 32) |
605			((uint64_t)cfis[8] << 24) |
606			((uint64_t)cfis[6] << 16) |
607			((uint64_t)cfis[5] << 8) |
608			cfis[4];
609		len = cfis[11] << 8 | cfis[3];
610		if (!len)
611			len = 65536;
612		ncq = 1;
613	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
614	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
615	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
616		lba = ((uint64_t)cfis[10] << 40) |
617			((uint64_t)cfis[9] << 32) |
618			((uint64_t)cfis[8] << 24) |
619			((uint64_t)cfis[6] << 16) |
620			((uint64_t)cfis[5] << 8) |
621			cfis[4];
622		len = cfis[13] << 8 | cfis[12];
623		if (!len)
624			len = 65536;
625	} else {
626		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
627			(cfis[5] << 8) | cfis[4];
628		len = cfis[12];
629		if (!len)
630			len = 256;
631	}
632	lba *= blockif_sectsz(p->bctx);
633	len *= blockif_sectsz(p->bctx);
634
635	/* Pull request off free list */
636	aior = STAILQ_FIRST(&p->iofhd);
637	assert(aior != NULL);
638	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
639
640	aior->cfis = cfis;
641	aior->slot = slot;
642	aior->len = len;
643	aior->done = done;
644	breq = &aior->io_req;
645	breq->br_offset = lba + done;
646	ahci_build_iov(p, aior, prdt, hdr->prdtl);
647
648	/* Mark this command in-flight. */
649	p->pending |= 1 << slot;
650
651	/* Stuff request onto busy list. */
652	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
653
654	if (readop)
655		err = blockif_read(p->bctx, breq);
656	else
657		err = blockif_write(p->bctx, breq);
658	assert(err == 0);
659
660	if (ncq)
661		p->ci &= ~(1 << slot);
662}
663
664static void
665ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
666{
667	struct ahci_ioreq *aior;
668	struct blockif_req *breq;
669	int err;
670
671	/*
672	 * Pull request off free list
673	 */
674	aior = STAILQ_FIRST(&p->iofhd);
675	assert(aior != NULL);
676	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
677	aior->cfis = cfis;
678	aior->slot = slot;
679	aior->len = 0;
680	aior->done = 0;
681	aior->more = 0;
682	breq = &aior->io_req;
683
684	/*
685	 * Mark this command in-flight.
686	 */
687	p->pending |= 1 << slot;
688
689	/*
690	 * Stuff request onto busy list
691	 */
692	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
693
694	err = blockif_flush(p->bctx, breq);
695	assert(err == 0);
696}
697
698static inline void
699read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
700		void *buf, int size)
701{
702	struct ahci_cmd_hdr *hdr;
703	struct ahci_prdt_entry *prdt;
704	void *to;
705	int i, len;
706
707	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
708	len = size;
709	to = buf;
710	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
711	for (i = 0; i < hdr->prdtl && len; i++) {
712		uint8_t *ptr;
713		uint32_t dbcsz;
714		int sublen;
715
716		dbcsz = (prdt->dbc & DBCMASK) + 1;
717		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
718		sublen = len < dbcsz ? len : dbcsz;
719		memcpy(to, ptr, sublen);
720		len -= sublen;
721		to += sublen;
722		prdt++;
723	}
724}
725
726static void
727ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
728{
729	struct ahci_ioreq *aior;
730	struct blockif_req *breq;
731	uint8_t *entry;
732	uint64_t elba;
733	uint32_t len, elen;
734	int err;
735	uint8_t buf[512];
736
737	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
738		len = (uint16_t)cfis[13] << 8 | cfis[12];
739		len *= 512;
740	} else { /* ATA_SEND_FPDMA_QUEUED */
741		len = (uint16_t)cfis[11] << 8 | cfis[3];
742		len *= 512;
743	}
744	read_prdt(p, slot, cfis, buf, sizeof(buf));
745
746next:
747	entry = &buf[done];
748	elba = ((uint64_t)entry[5] << 40) |
749		((uint64_t)entry[4] << 32) |
750		((uint64_t)entry[3] << 24) |
751		((uint64_t)entry[2] << 16) |
752		((uint64_t)entry[1] << 8) |
753		entry[0];
754	elen = (uint16_t)entry[7] << 8 | entry[6];
755	done += 8;
756	if (elen == 0) {
757		if (done >= len) {
758			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
759			p->pending &= ~(1 << slot);
760			ahci_check_stopped(p);
761			return;
762		}
763		goto next;
764	}
765
766	/*
767	 * Pull request off free list
768	 */
769	aior = STAILQ_FIRST(&p->iofhd);
770	assert(aior != NULL);
771	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
772	aior->cfis = cfis;
773	aior->slot = slot;
774	aior->len = len;
775	aior->done = done;
776	aior->more = (len != done);
777
778	breq = &aior->io_req;
779	breq->br_offset = elba * blockif_sectsz(p->bctx);
780	breq->br_resid = elen * blockif_sectsz(p->bctx);
781
782	/*
783	 * Mark this command in-flight.
784	 */
785	p->pending |= 1 << slot;
786
787	/*
788	 * Stuff request onto busy list
789	 */
790	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
791
792	err = blockif_delete(p->bctx, breq);
793	assert(err == 0);
794}
795
796static inline void
797write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
798		void *buf, int size)
799{
800	struct ahci_cmd_hdr *hdr;
801	struct ahci_prdt_entry *prdt;
802	void *from;
803	int i, len;
804
805	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
806	len = size;
807	from = buf;
808	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
809	for (i = 0; i < hdr->prdtl && len; i++) {
810		uint8_t *ptr;
811		uint32_t dbcsz;
812		int sublen;
813
814		dbcsz = (prdt->dbc & DBCMASK) + 1;
815		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
816		sublen = len < dbcsz ? len : dbcsz;
817		memcpy(ptr, from, sublen);
818		len -= sublen;
819		from += sublen;
820		prdt++;
821	}
822	hdr->prdbc = size - len;
823}
824
825static void
826ahci_checksum(uint8_t *buf, int size)
827{
828	int i;
829	uint8_t sum = 0;
830
831	for (i = 0; i < size - 1; i++)
832		sum += buf[i];
833	buf[size - 1] = 0x100 - sum;
834}
835
836static void
837ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
838{
839	struct ahci_cmd_hdr *hdr;
840	uint8_t buf[512];
841
842	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
843	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
844	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
845		ahci_write_fis_d2h(p, slot, cfis,
846		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
847		return;
848	}
849
850	memset(buf, 0, sizeof(buf));
851	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
852	ahci_checksum(buf, sizeof(buf));
853
854	if (cfis[2] == ATA_READ_LOG_EXT)
855		ahci_write_fis_piosetup(p);
856	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
857	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
858}
859
860static void
861handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
862{
863	struct ahci_cmd_hdr *hdr;
864
865	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
866	if (p->atapi || hdr->prdtl == 0) {
867		ahci_write_fis_d2h(p, slot, cfis,
868		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
869	} else {
870		uint16_t buf[256];
871		uint64_t sectors;
872		int sectsz, psectsz, psectoff, candelete, ro;
873		uint16_t cyl;
874		uint8_t sech, heads;
875
876		ro = blockif_is_ro(p->bctx);
877		candelete = blockif_candelete(p->bctx);
878		sectsz = blockif_sectsz(p->bctx);
879		sectors = blockif_size(p->bctx) / sectsz;
880		blockif_chs(p->bctx, &cyl, &heads, &sech);
881		blockif_psectsz(p->bctx, &psectsz, &psectoff);
882		memset(buf, 0, sizeof(buf));
883		buf[0] = 0x0040;
884		buf[1] = cyl;
885		buf[3] = heads;
886		buf[6] = sech;
887		ata_string((uint8_t *)(buf+10), p->ident, 20);
888		ata_string((uint8_t *)(buf+23), "001", 8);
889		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
890		buf[47] = (0x8000 | 128);
891		buf[48] = 0x1;
892		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
893		buf[50] = (1 << 14);
894		buf[53] = (1 << 1 | 1 << 2);
895		if (p->mult_sectors)
896			buf[59] = (0x100 | p->mult_sectors);
897		if (sectors <= 0x0fffffff) {
898			buf[60] = sectors;
899			buf[61] = (sectors >> 16);
900		} else {
901			buf[60] = 0xffff;
902			buf[61] = 0x0fff;
903		}
904		buf[63] = 0x7;
905		if (p->xfermode & ATA_WDMA0)
906			buf[63] |= (1 << ((p->xfermode & 7) + 8));
907		buf[64] = 0x3;
908		buf[65] = 120;
909		buf[66] = 120;
910		buf[67] = 120;
911		buf[68] = 120;
912		buf[69] = 0;
913		buf[75] = 31;
914		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
915			   ATA_SUPPORT_NCQ);
916		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
917			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
918		buf[80] = 0x3f0;
919		buf[81] = 0x28;
920		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
921			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
922		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
923			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
924		buf[84] = (1 << 14);
925		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
926			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
927		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
928			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
929		buf[87] = (1 << 14);
930		buf[88] = 0x7f;
931		if (p->xfermode & ATA_UDMA0)
932			buf[88] |= (1 << ((p->xfermode & 7) + 8));
933		buf[93] = (1 | 1 <<14);
934		buf[100] = sectors;
935		buf[101] = (sectors >> 16);
936		buf[102] = (sectors >> 32);
937		buf[103] = (sectors >> 48);
938		if (candelete && !ro) {
939			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
940			buf[105] = 1;
941			buf[169] = ATA_SUPPORT_DSM_TRIM;
942		}
943		buf[106] = 0x4000;
944		buf[209] = 0x4000;
945		if (psectsz > sectsz) {
946			buf[106] |= 0x2000;
947			buf[106] |= ffsl(psectsz / sectsz) - 1;
948			buf[209] |= (psectoff / sectsz);
949		}
950		if (sectsz > 512) {
951			buf[106] |= 0x1000;
952			buf[117] = sectsz / 2;
953			buf[118] = ((sectsz / 2) >> 16);
954		}
955		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
956		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
957		buf[222] = 0x1020;
958		buf[255] = 0x00a5;
959		ahci_checksum((uint8_t *)buf, sizeof(buf));
960		ahci_write_fis_piosetup(p);
961		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
962		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
963	}
964}
965
966static void
967handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
968{
969	if (!p->atapi) {
970		ahci_write_fis_d2h(p, slot, cfis,
971		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
972	} else {
973		uint16_t buf[256];
974
975		memset(buf, 0, sizeof(buf));
976		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
977		ata_string((uint8_t *)(buf+10), p->ident, 20);
978		ata_string((uint8_t *)(buf+23), "001", 8);
979		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
980		buf[49] = (1 << 9 | 1 << 8);
981		buf[50] = (1 << 14 | 1);
982		buf[53] = (1 << 2 | 1 << 1);
983		buf[62] = 0x3f;
984		buf[63] = 7;
985		if (p->xfermode & ATA_WDMA0)
986			buf[63] |= (1 << ((p->xfermode & 7) + 8));
987		buf[64] = 3;
988		buf[65] = 120;
989		buf[66] = 120;
990		buf[67] = 120;
991		buf[68] = 120;
992		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
993		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
994		buf[78] = (1 << 5);
995		buf[80] = 0x3f0;
996		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
997			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
998		buf[83] = (1 << 14);
999		buf[84] = (1 << 14);
1000		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1001			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1002		buf[87] = (1 << 14);
1003		buf[88] = 0x7f;
1004		if (p->xfermode & ATA_UDMA0)
1005			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1006		buf[222] = 0x1020;
1007		buf[255] = 0x00a5;
1008		ahci_checksum((uint8_t *)buf, sizeof(buf));
1009		ahci_write_fis_piosetup(p);
1010		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1011		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1012	}
1013}
1014
1015static void
1016atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1017{
1018	uint8_t buf[36];
1019	uint8_t *acmd;
1020	int len;
1021	uint32_t tfd;
1022
1023	acmd = cfis + 0x40;
1024
1025	if (acmd[1] & 1) {		/* VPD */
1026		if (acmd[2] == 0) {	/* Supported VPD pages */
1027			buf[0] = 0x05;
1028			buf[1] = 0;
1029			buf[2] = 0;
1030			buf[3] = 1;
1031			buf[4] = 0;
1032			len = 4 + buf[3];
1033		} else {
1034			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1035			p->asc = 0x24;
1036			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1037			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1038			ahci_write_fis_d2h(p, slot, cfis, tfd);
1039			return;
1040		}
1041	} else {
1042		buf[0] = 0x05;
1043		buf[1] = 0x80;
1044		buf[2] = 0x00;
1045		buf[3] = 0x21;
1046		buf[4] = 31;
1047		buf[5] = 0;
1048		buf[6] = 0;
1049		buf[7] = 0;
1050		atapi_string(buf + 8, "BHYVE", 8);
1051		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1052		atapi_string(buf + 32, "001", 4);
1053		len = sizeof(buf);
1054	}
1055
1056	if (len > acmd[4])
1057		len = acmd[4];
1058	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1059	write_prdt(p, slot, cfis, buf, len);
1060	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1061}
1062
1063static void
1064atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1065{
1066	uint8_t buf[8];
1067	uint64_t sectors;
1068
1069	sectors = blockif_size(p->bctx) / 2048;
1070	be32enc(buf, sectors - 1);
1071	be32enc(buf + 4, 2048);
1072	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1073	write_prdt(p, slot, cfis, buf, sizeof(buf));
1074	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1075}
1076
1077static void
1078atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1079{
1080	uint8_t *acmd;
1081	uint8_t format;
1082	int len;
1083
1084	acmd = cfis + 0x40;
1085
1086	len = be16dec(acmd + 7);
1087	format = acmd[9] >> 6;
1088	switch (format) {
1089	case 0:
1090	{
1091		int msf, size;
1092		uint64_t sectors;
1093		uint8_t start_track, buf[20], *bp;
1094
1095		msf = (acmd[1] >> 1) & 1;
1096		start_track = acmd[6];
1097		if (start_track > 1 && start_track != 0xaa) {
1098			uint32_t tfd;
1099			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1100			p->asc = 0x24;
1101			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1102			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1103			ahci_write_fis_d2h(p, slot, cfis, tfd);
1104			return;
1105		}
1106		bp = buf + 2;
1107		*bp++ = 1;
1108		*bp++ = 1;
1109		if (start_track <= 1) {
1110			*bp++ = 0;
1111			*bp++ = 0x14;
1112			*bp++ = 1;
1113			*bp++ = 0;
1114			if (msf) {
1115				*bp++ = 0;
1116				lba_to_msf(bp, 0);
1117				bp += 3;
1118			} else {
1119				*bp++ = 0;
1120				*bp++ = 0;
1121				*bp++ = 0;
1122				*bp++ = 0;
1123			}
1124		}
1125		*bp++ = 0;
1126		*bp++ = 0x14;
1127		*bp++ = 0xaa;
1128		*bp++ = 0;
1129		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1130		sectors >>= 2;
1131		if (msf) {
1132			*bp++ = 0;
1133			lba_to_msf(bp, sectors);
1134			bp += 3;
1135		} else {
1136			be32enc(bp, sectors);
1137			bp += 4;
1138		}
1139		size = bp - buf;
1140		be16enc(buf, size - 2);
1141		if (len > size)
1142			len = size;
1143		write_prdt(p, slot, cfis, buf, len);
1144		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1145		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1146		break;
1147	}
1148	case 1:
1149	{
1150		uint8_t buf[12];
1151
1152		memset(buf, 0, sizeof(buf));
1153		buf[1] = 0xa;
1154		buf[2] = 0x1;
1155		buf[3] = 0x1;
1156		if (len > sizeof(buf))
1157			len = sizeof(buf);
1158		write_prdt(p, slot, cfis, buf, len);
1159		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1160		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1161		break;
1162	}
1163	case 2:
1164	{
1165		int msf, size;
1166		uint64_t sectors;
1167		uint8_t start_track, *bp, buf[50];
1168
1169		msf = (acmd[1] >> 1) & 1;
1170		start_track = acmd[6];
1171		bp = buf + 2;
1172		*bp++ = 1;
1173		*bp++ = 1;
1174
1175		*bp++ = 1;
1176		*bp++ = 0x14;
1177		*bp++ = 0;
1178		*bp++ = 0xa0;
1179		*bp++ = 0;
1180		*bp++ = 0;
1181		*bp++ = 0;
1182		*bp++ = 0;
1183		*bp++ = 1;
1184		*bp++ = 0;
1185		*bp++ = 0;
1186
1187		*bp++ = 1;
1188		*bp++ = 0x14;
1189		*bp++ = 0;
1190		*bp++ = 0xa1;
1191		*bp++ = 0;
1192		*bp++ = 0;
1193		*bp++ = 0;
1194		*bp++ = 0;
1195		*bp++ = 1;
1196		*bp++ = 0;
1197		*bp++ = 0;
1198
1199		*bp++ = 1;
1200		*bp++ = 0x14;
1201		*bp++ = 0;
1202		*bp++ = 0xa2;
1203		*bp++ = 0;
1204		*bp++ = 0;
1205		*bp++ = 0;
1206		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1207		sectors >>= 2;
1208		if (msf) {
1209			*bp++ = 0;
1210			lba_to_msf(bp, sectors);
1211			bp += 3;
1212		} else {
1213			be32enc(bp, sectors);
1214			bp += 4;
1215		}
1216
1217		*bp++ = 1;
1218		*bp++ = 0x14;
1219		*bp++ = 0;
1220		*bp++ = 1;
1221		*bp++ = 0;
1222		*bp++ = 0;
1223		*bp++ = 0;
1224		if (msf) {
1225			*bp++ = 0;
1226			lba_to_msf(bp, 0);
1227			bp += 3;
1228		} else {
1229			*bp++ = 0;
1230			*bp++ = 0;
1231			*bp++ = 0;
1232			*bp++ = 0;
1233		}
1234
1235		size = bp - buf;
1236		be16enc(buf, size - 2);
1237		if (len > size)
1238			len = size;
1239		write_prdt(p, slot, cfis, buf, len);
1240		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1241		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1242		break;
1243	}
1244	default:
1245	{
1246		uint32_t tfd;
1247
1248		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1249		p->asc = 0x24;
1250		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1251		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1252		ahci_write_fis_d2h(p, slot, cfis, tfd);
1253		break;
1254	}
1255	}
1256}
1257
1258static void
1259atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1260{
1261	uint8_t buf[16];
1262
1263	memset(buf, 0, sizeof(buf));
1264	buf[3] = 8;
1265
1266	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1267	write_prdt(p, slot, cfis, buf, sizeof(buf));
1268	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1269}
1270
1271static void
1272atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1273{
1274	struct ahci_ioreq *aior;
1275	struct ahci_cmd_hdr *hdr;
1276	struct ahci_prdt_entry *prdt;
1277	struct blockif_req *breq;
1278	struct pci_ahci_softc *sc;
1279	uint8_t *acmd;
1280	uint64_t lba;
1281	uint32_t len;
1282	int err;
1283
1284	sc = p->pr_sc;
1285	acmd = cfis + 0x40;
1286	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1287	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1288
1289	lba = be32dec(acmd + 2);
1290	if (acmd[0] == READ_10)
1291		len = be16dec(acmd + 7);
1292	else
1293		len = be32dec(acmd + 6);
1294	if (len == 0) {
1295		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1296		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1297	}
1298	lba *= 2048;
1299	len *= 2048;
1300
1301	/*
1302	 * Pull request off free list
1303	 */
1304	aior = STAILQ_FIRST(&p->iofhd);
1305	assert(aior != NULL);
1306	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1307	aior->cfis = cfis;
1308	aior->slot = slot;
1309	aior->len = len;
1310	aior->done = done;
1311	breq = &aior->io_req;
1312	breq->br_offset = lba + done;
1313	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1314
1315	/* Mark this command in-flight. */
1316	p->pending |= 1 << slot;
1317
1318	/* Stuff request onto busy list. */
1319	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1320
1321	err = blockif_read(p->bctx, breq);
1322	assert(err == 0);
1323}
1324
1325static void
1326atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1327{
1328	uint8_t buf[64];
1329	uint8_t *acmd;
1330	int len;
1331
1332	acmd = cfis + 0x40;
1333	len = acmd[4];
1334	if (len > sizeof(buf))
1335		len = sizeof(buf);
1336	memset(buf, 0, len);
1337	buf[0] = 0x70 | (1 << 7);
1338	buf[2] = p->sense_key;
1339	buf[7] = 10;
1340	buf[12] = p->asc;
1341	write_prdt(p, slot, cfis, buf, len);
1342	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1343	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1344}
1345
1346static void
1347atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1348{
1349	uint8_t *acmd = cfis + 0x40;
1350	uint32_t tfd;
1351
1352	switch (acmd[4] & 3) {
1353	case 0:
1354	case 1:
1355	case 3:
1356		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1357		tfd = ATA_S_READY | ATA_S_DSC;
1358		break;
1359	case 2:
1360		/* TODO eject media */
1361		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1362		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1363		p->asc = 0x53;
1364		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1365		break;
1366	}
1367	ahci_write_fis_d2h(p, slot, cfis, tfd);
1368}
1369
1370static void
1371atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1372{
1373	uint8_t *acmd;
1374	uint32_t tfd;
1375	uint8_t pc, code;
1376	int len;
1377
1378	acmd = cfis + 0x40;
1379	len = be16dec(acmd + 7);
1380	pc = acmd[2] >> 6;
1381	code = acmd[2] & 0x3f;
1382
1383	switch (pc) {
1384	case 0:
1385		switch (code) {
1386		case MODEPAGE_RW_ERROR_RECOVERY:
1387		{
1388			uint8_t buf[16];
1389
1390			if (len > sizeof(buf))
1391				len = sizeof(buf);
1392
1393			memset(buf, 0, sizeof(buf));
1394			be16enc(buf, 16 - 2);
1395			buf[2] = 0x70;
1396			buf[8] = 0x01;
1397			buf[9] = 16 - 10;
1398			buf[11] = 0x05;
1399			write_prdt(p, slot, cfis, buf, len);
1400			tfd = ATA_S_READY | ATA_S_DSC;
1401			break;
1402		}
1403		case MODEPAGE_CD_CAPABILITIES:
1404		{
1405			uint8_t buf[30];
1406
1407			if (len > sizeof(buf))
1408				len = sizeof(buf);
1409
1410			memset(buf, 0, sizeof(buf));
1411			be16enc(buf, 30 - 2);
1412			buf[2] = 0x70;
1413			buf[8] = 0x2A;
1414			buf[9] = 30 - 10;
1415			buf[10] = 0x08;
1416			buf[12] = 0x71;
1417			be16enc(&buf[18], 2);
1418			be16enc(&buf[20], 512);
1419			write_prdt(p, slot, cfis, buf, len);
1420			tfd = ATA_S_READY | ATA_S_DSC;
1421			break;
1422		}
1423		default:
1424			goto error;
1425			break;
1426		}
1427		break;
1428	case 3:
1429		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1430		p->asc = 0x39;
1431		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1432		break;
1433error:
1434	case 1:
1435	case 2:
1436		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1437		p->asc = 0x24;
1438		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1439		break;
1440	}
1441	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1442	ahci_write_fis_d2h(p, slot, cfis, tfd);
1443}
1444
1445static void
1446atapi_get_event_status_notification(struct ahci_port *p, int slot,
1447    uint8_t *cfis)
1448{
1449	uint8_t *acmd;
1450	uint32_t tfd;
1451
1452	acmd = cfis + 0x40;
1453
1454	/* we don't support asynchronous operation */
1455	if (!(acmd[1] & 1)) {
1456		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1457		p->asc = 0x24;
1458		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1459	} else {
1460		uint8_t buf[8];
1461		int len;
1462
1463		len = be16dec(acmd + 7);
1464		if (len > sizeof(buf))
1465			len = sizeof(buf);
1466
1467		memset(buf, 0, sizeof(buf));
1468		be16enc(buf, 8 - 2);
1469		buf[2] = 0x04;
1470		buf[3] = 0x10;
1471		buf[5] = 0x02;
1472		write_prdt(p, slot, cfis, buf, len);
1473		tfd = ATA_S_READY | ATA_S_DSC;
1474	}
1475	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1476	ahci_write_fis_d2h(p, slot, cfis, tfd);
1477}
1478
1479static void
1480handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1481{
1482	uint8_t *acmd;
1483
1484	acmd = cfis + 0x40;
1485
1486#ifdef AHCI_DEBUG
1487	{
1488		int i;
1489		DPRINTF("ACMD:");
1490		for (i = 0; i < 16; i++)
1491			DPRINTF("%02x ", acmd[i]);
1492		DPRINTF("\n");
1493	}
1494#endif
1495
1496	switch (acmd[0]) {
1497	case TEST_UNIT_READY:
1498		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1499		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1500		break;
1501	case INQUIRY:
1502		atapi_inquiry(p, slot, cfis);
1503		break;
1504	case READ_CAPACITY:
1505		atapi_read_capacity(p, slot, cfis);
1506		break;
1507	case PREVENT_ALLOW:
1508		/* TODO */
1509		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1510		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1511		break;
1512	case READ_TOC:
1513		atapi_read_toc(p, slot, cfis);
1514		break;
1515	case REPORT_LUNS:
1516		atapi_report_luns(p, slot, cfis);
1517		break;
1518	case READ_10:
1519	case READ_12:
1520		atapi_read(p, slot, cfis, 0);
1521		break;
1522	case REQUEST_SENSE:
1523		atapi_request_sense(p, slot, cfis);
1524		break;
1525	case START_STOP_UNIT:
1526		atapi_start_stop_unit(p, slot, cfis);
1527		break;
1528	case MODE_SENSE_10:
1529		atapi_mode_sense(p, slot, cfis);
1530		break;
1531	case GET_EVENT_STATUS_NOTIFICATION:
1532		atapi_get_event_status_notification(p, slot, cfis);
1533		break;
1534	default:
1535		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1536		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1537		p->asc = 0x20;
1538		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1539				ATA_S_READY | ATA_S_ERROR);
1540		break;
1541	}
1542}
1543
1544static void
1545ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1546{
1547
1548	switch (cfis[2]) {
1549	case ATA_ATA_IDENTIFY:
1550		handle_identify(p, slot, cfis);
1551		break;
1552	case ATA_SETFEATURES:
1553	{
1554		switch (cfis[3]) {
1555		case ATA_SF_ENAB_SATA_SF:
1556			switch (cfis[12]) {
1557			case ATA_SATA_SF_AN:
1558				p->tfd = ATA_S_DSC | ATA_S_READY;
1559				break;
1560			default:
1561				p->tfd = ATA_S_ERROR | ATA_S_READY;
1562				p->tfd |= (ATA_ERROR_ABORT << 8);
1563				break;
1564			}
1565			break;
1566		case ATA_SF_ENAB_WCACHE:
1567		case ATA_SF_DIS_WCACHE:
1568		case ATA_SF_ENAB_RCACHE:
1569		case ATA_SF_DIS_RCACHE:
1570			p->tfd = ATA_S_DSC | ATA_S_READY;
1571			break;
1572		case ATA_SF_SETXFER:
1573		{
1574			switch (cfis[12] & 0xf8) {
1575			case ATA_PIO:
1576			case ATA_PIO0:
1577				break;
1578			case ATA_WDMA0:
1579			case ATA_UDMA0:
1580				p->xfermode = (cfis[12] & 0x7);
1581				break;
1582			}
1583			p->tfd = ATA_S_DSC | ATA_S_READY;
1584			break;
1585		}
1586		default:
1587			p->tfd = ATA_S_ERROR | ATA_S_READY;
1588			p->tfd |= (ATA_ERROR_ABORT << 8);
1589			break;
1590		}
1591		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1592		break;
1593	}
1594	case ATA_SET_MULTI:
1595		if (cfis[12] != 0 &&
1596			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1597			p->tfd = ATA_S_ERROR | ATA_S_READY;
1598			p->tfd |= (ATA_ERROR_ABORT << 8);
1599		} else {
1600			p->mult_sectors = cfis[12];
1601			p->tfd = ATA_S_DSC | ATA_S_READY;
1602		}
1603		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1604		break;
1605	case ATA_READ:
1606	case ATA_WRITE:
1607	case ATA_READ48:
1608	case ATA_WRITE48:
1609	case ATA_READ_MUL:
1610	case ATA_WRITE_MUL:
1611	case ATA_READ_MUL48:
1612	case ATA_WRITE_MUL48:
1613	case ATA_READ_DMA:
1614	case ATA_WRITE_DMA:
1615	case ATA_READ_DMA48:
1616	case ATA_WRITE_DMA48:
1617	case ATA_READ_FPDMA_QUEUED:
1618	case ATA_WRITE_FPDMA_QUEUED:
1619		ahci_handle_rw(p, slot, cfis, 0);
1620		break;
1621	case ATA_FLUSHCACHE:
1622	case ATA_FLUSHCACHE48:
1623		ahci_handle_flush(p, slot, cfis);
1624		break;
1625	case ATA_DATA_SET_MANAGEMENT:
1626		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1627		    cfis[13] == 0 && cfis[12] == 1) {
1628			ahci_handle_dsm_trim(p, slot, cfis, 0);
1629			break;
1630		}
1631		ahci_write_fis_d2h(p, slot, cfis,
1632		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1633		break;
1634	case ATA_SEND_FPDMA_QUEUED:
1635		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1636		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1637		    cfis[11] == 0 && cfis[13] == 1) {
1638			ahci_handle_dsm_trim(p, slot, cfis, 0);
1639			break;
1640		}
1641		ahci_write_fis_d2h(p, slot, cfis,
1642		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1643		break;
1644	case ATA_READ_LOG_EXT:
1645	case ATA_READ_LOG_DMA_EXT:
1646		ahci_handle_read_log(p, slot, cfis);
1647		break;
1648	case ATA_NOP:
1649		ahci_write_fis_d2h(p, slot, cfis,
1650		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1651		break;
1652	case ATA_STANDBY_CMD:
1653	case ATA_STANDBY_IMMEDIATE:
1654	case ATA_IDLE_CMD:
1655	case ATA_IDLE_IMMEDIATE:
1656	case ATA_SLEEP:
1657		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1658		break;
1659	case ATA_ATAPI_IDENTIFY:
1660		handle_atapi_identify(p, slot, cfis);
1661		break;
1662	case ATA_PACKET_CMD:
1663		if (!p->atapi) {
1664			ahci_write_fis_d2h(p, slot, cfis,
1665			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1666		} else
1667			handle_packet_cmd(p, slot, cfis);
1668		break;
1669	default:
1670		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1671		ahci_write_fis_d2h(p, slot, cfis,
1672		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1673		break;
1674	}
1675}
1676
1677static void
1678ahci_handle_slot(struct ahci_port *p, int slot)
1679{
1680	struct ahci_cmd_hdr *hdr;
1681	struct ahci_prdt_entry *prdt;
1682	struct pci_ahci_softc *sc;
1683	uint8_t *cfis;
1684	int cfl;
1685
1686	sc = p->pr_sc;
1687	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1688	cfl = (hdr->flags & 0x1f) * 4;
1689	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1690			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1691	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1692
1693#ifdef AHCI_DEBUG
1694	DPRINTF("\ncfis:");
1695	for (i = 0; i < cfl; i++) {
1696		if (i % 10 == 0)
1697			DPRINTF("\n");
1698		DPRINTF("%02x ", cfis[i]);
1699	}
1700	DPRINTF("\n");
1701
1702	for (i = 0; i < hdr->prdtl; i++) {
1703		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1704		prdt++;
1705	}
1706#endif
1707
1708	if (cfis[0] != FIS_TYPE_REGH2D) {
1709		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1710		return;
1711	}
1712
1713	if (cfis[1] & 0x80) {
1714		ahci_handle_cmd(p, slot, cfis);
1715	} else {
1716		if (cfis[15] & (1 << 2))
1717			p->reset = 1;
1718		else if (p->reset) {
1719			p->reset = 0;
1720			ahci_port_reset(p);
1721		}
1722		p->ci &= ~(1 << slot);
1723	}
1724}
1725
1726static void
1727ahci_handle_port(struct ahci_port *p)
1728{
1729	int i;
1730
1731	if (!(p->cmd & AHCI_P_CMD_ST))
1732		return;
1733
1734	/*
1735	 * Search for any new commands to issue ignoring those that
1736	 * are already in-flight.
1737	 */
1738	for (i = 0; (i < 32) && p->ci; i++) {
1739		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1740			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1741			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1742			ahci_handle_slot(p, i);
1743		}
1744	}
1745}
1746
1747/*
1748 * blockif callback routine - this runs in the context of the blockif
1749 * i/o thread, so the mutex needs to be acquired.
1750 */
1751static void
1752ata_ioreq_cb(struct blockif_req *br, int err)
1753{
1754	struct ahci_cmd_hdr *hdr;
1755	struct ahci_ioreq *aior;
1756	struct ahci_port *p;
1757	struct pci_ahci_softc *sc;
1758	uint32_t tfd;
1759	uint8_t *cfis;
1760	int slot, ncq, dsm;
1761
1762	DPRINTF("%s %d\n", __func__, err);
1763
1764	ncq = dsm = 0;
1765	aior = br->br_param;
1766	p = aior->io_pr;
1767	cfis = aior->cfis;
1768	slot = aior->slot;
1769	sc = p->pr_sc;
1770	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1771
1772	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1773	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1774	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1775		ncq = 1;
1776	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1777	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1778	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1779		dsm = 1;
1780
1781	pthread_mutex_lock(&sc->mtx);
1782
1783	/*
1784	 * Delete the blockif request from the busy list
1785	 */
1786	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1787
1788	/*
1789	 * Move the blockif request back to the free list
1790	 */
1791	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1792
1793	if (!err)
1794		hdr->prdbc = aior->done;
1795
1796	if (!err && aior->more) {
1797		if (dsm)
1798			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1799		else
1800			ahci_handle_rw(p, slot, cfis, aior->done);
1801		goto out;
1802	}
1803
1804	if (!err)
1805		tfd = ATA_S_READY | ATA_S_DSC;
1806	else
1807		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1808	if (ncq)
1809		ahci_write_fis_sdb(p, slot, cfis, tfd);
1810	else
1811		ahci_write_fis_d2h(p, slot, cfis, tfd);
1812
1813	/*
1814	 * This command is now complete.
1815	 */
1816	p->pending &= ~(1 << slot);
1817
1818	ahci_check_stopped(p);
1819out:
1820	pthread_mutex_unlock(&sc->mtx);
1821	DPRINTF("%s exit\n", __func__);
1822}
1823
1824static void
1825atapi_ioreq_cb(struct blockif_req *br, int err)
1826{
1827	struct ahci_cmd_hdr *hdr;
1828	struct ahci_ioreq *aior;
1829	struct ahci_port *p;
1830	struct pci_ahci_softc *sc;
1831	uint8_t *cfis;
1832	uint32_t tfd;
1833	int slot;
1834
1835	DPRINTF("%s %d\n", __func__, err);
1836
1837	aior = br->br_param;
1838	p = aior->io_pr;
1839	cfis = aior->cfis;
1840	slot = aior->slot;
1841	sc = p->pr_sc;
1842	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1843
1844	pthread_mutex_lock(&sc->mtx);
1845
1846	/*
1847	 * Delete the blockif request from the busy list
1848	 */
1849	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1850
1851	/*
1852	 * Move the blockif request back to the free list
1853	 */
1854	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1855
1856	if (!err)
1857		hdr->prdbc = aior->done;
1858
1859	if (!err && aior->more) {
1860		atapi_read(p, slot, cfis, aior->done);
1861		goto out;
1862	}
1863
1864	if (!err) {
1865		tfd = ATA_S_READY | ATA_S_DSC;
1866	} else {
1867		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1868		p->asc = 0x21;
1869		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1870	}
1871	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1872	ahci_write_fis_d2h(p, slot, cfis, tfd);
1873
1874	/*
1875	 * This command is now complete.
1876	 */
1877	p->pending &= ~(1 << slot);
1878
1879	ahci_check_stopped(p);
1880out:
1881	pthread_mutex_unlock(&sc->mtx);
1882	DPRINTF("%s exit\n", __func__);
1883}
1884
1885static void
1886pci_ahci_ioreq_init(struct ahci_port *pr)
1887{
1888	struct ahci_ioreq *vr;
1889	int i;
1890
1891	pr->ioqsz = blockif_queuesz(pr->bctx);
1892	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1893	STAILQ_INIT(&pr->iofhd);
1894
1895	/*
1896	 * Add all i/o request entries to the free queue
1897	 */
1898	for (i = 0; i < pr->ioqsz; i++) {
1899		vr = &pr->ioreq[i];
1900		vr->io_pr = pr;
1901		if (!pr->atapi)
1902			vr->io_req.br_callback = ata_ioreq_cb;
1903		else
1904			vr->io_req.br_callback = atapi_ioreq_cb;
1905		vr->io_req.br_param = vr;
1906		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1907	}
1908
1909	TAILQ_INIT(&pr->iobhd);
1910}
1911
1912static void
1913pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1914{
1915	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1916	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1917	struct ahci_port *p = &sc->port[port];
1918
1919	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1920		port, offset, value);
1921
1922	switch (offset) {
1923	case AHCI_P_CLB:
1924		p->clb = value;
1925		break;
1926	case AHCI_P_CLBU:
1927		p->clbu = value;
1928		break;
1929	case AHCI_P_FB:
1930		p->fb = value;
1931		break;
1932	case AHCI_P_FBU:
1933		p->fbu = value;
1934		break;
1935	case AHCI_P_IS:
1936		p->is &= ~value;
1937		break;
1938	case AHCI_P_IE:
1939		p->ie = value & 0xFDC000FF;
1940		ahci_generate_intr(sc);
1941		break;
1942	case AHCI_P_CMD:
1943	{
1944		p->cmd = value;
1945
1946		if (!(value & AHCI_P_CMD_ST)) {
1947			ahci_port_stop(p);
1948		} else {
1949			uint64_t clb;
1950
1951			p->cmd |= AHCI_P_CMD_CR;
1952			clb = (uint64_t)p->clbu << 32 | p->clb;
1953			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1954					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1955		}
1956
1957		if (value & AHCI_P_CMD_FRE) {
1958			uint64_t fb;
1959
1960			p->cmd |= AHCI_P_CMD_FR;
1961			fb = (uint64_t)p->fbu << 32 | p->fb;
1962			/* we don't support FBSCP, so rfis size is 256Bytes */
1963			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1964		} else {
1965			p->cmd &= ~AHCI_P_CMD_FR;
1966		}
1967
1968		if (value & AHCI_P_CMD_CLO) {
1969			p->tfd = 0;
1970			p->cmd &= ~AHCI_P_CMD_CLO;
1971		}
1972
1973		ahci_handle_port(p);
1974		break;
1975	}
1976	case AHCI_P_TFD:
1977	case AHCI_P_SIG:
1978	case AHCI_P_SSTS:
1979		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1980		break;
1981	case AHCI_P_SCTL:
1982		p->sctl = value;
1983		if (!(p->cmd & AHCI_P_CMD_ST)) {
1984			if (value & ATA_SC_DET_RESET)
1985				ahci_port_reset(p);
1986		}
1987		break;
1988	case AHCI_P_SERR:
1989		p->serr &= ~value;
1990		break;
1991	case AHCI_P_SACT:
1992		p->sact |= value;
1993		break;
1994	case AHCI_P_CI:
1995		p->ci |= value;
1996		ahci_handle_port(p);
1997		break;
1998	case AHCI_P_SNTF:
1999	case AHCI_P_FBS:
2000	default:
2001		break;
2002	}
2003}
2004
2005static void
2006pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2007{
2008	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2009		offset, value);
2010
2011	switch (offset) {
2012	case AHCI_CAP:
2013	case AHCI_PI:
2014	case AHCI_VS:
2015	case AHCI_CAP2:
2016		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2017		break;
2018	case AHCI_GHC:
2019		if (value & AHCI_GHC_HR)
2020			ahci_reset(sc);
2021		else if (value & AHCI_GHC_IE) {
2022			sc->ghc |= AHCI_GHC_IE;
2023			ahci_generate_intr(sc);
2024		}
2025		break;
2026	case AHCI_IS:
2027		sc->is &= ~value;
2028		ahci_generate_intr(sc);
2029		break;
2030	default:
2031		break;
2032	}
2033}
2034
2035static void
2036pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2037		int baridx, uint64_t offset, int size, uint64_t value)
2038{
2039	struct pci_ahci_softc *sc = pi->pi_arg;
2040
2041	assert(baridx == 5);
2042	assert(size == 4);
2043
2044	pthread_mutex_lock(&sc->mtx);
2045
2046	if (offset < AHCI_OFFSET)
2047		pci_ahci_host_write(sc, offset, value);
2048	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2049		pci_ahci_port_write(sc, offset, value);
2050	else
2051		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2052
2053	pthread_mutex_unlock(&sc->mtx);
2054}
2055
2056static uint64_t
2057pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2058{
2059	uint32_t value;
2060
2061	switch (offset) {
2062	case AHCI_CAP:
2063	case AHCI_GHC:
2064	case AHCI_IS:
2065	case AHCI_PI:
2066	case AHCI_VS:
2067	case AHCI_CCCC:
2068	case AHCI_CCCP:
2069	case AHCI_EM_LOC:
2070	case AHCI_EM_CTL:
2071	case AHCI_CAP2:
2072	{
2073		uint32_t *p = &sc->cap;
2074		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2075		value = *p;
2076		break;
2077	}
2078	default:
2079		value = 0;
2080		break;
2081	}
2082	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2083		offset, value);
2084
2085	return (value);
2086}
2087
2088static uint64_t
2089pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2090{
2091	uint32_t value;
2092	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2093	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2094
2095	switch (offset) {
2096	case AHCI_P_CLB:
2097	case AHCI_P_CLBU:
2098	case AHCI_P_FB:
2099	case AHCI_P_FBU:
2100	case AHCI_P_IS:
2101	case AHCI_P_IE:
2102	case AHCI_P_CMD:
2103	case AHCI_P_TFD:
2104	case AHCI_P_SIG:
2105	case AHCI_P_SSTS:
2106	case AHCI_P_SCTL:
2107	case AHCI_P_SERR:
2108	case AHCI_P_SACT:
2109	case AHCI_P_CI:
2110	case AHCI_P_SNTF:
2111	case AHCI_P_FBS:
2112	{
2113		uint32_t *p= &sc->port[port].clb;
2114		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2115		value = *p;
2116		break;
2117	}
2118	default:
2119		value = 0;
2120		break;
2121	}
2122
2123	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2124		port, offset, value);
2125
2126	return value;
2127}
2128
2129static uint64_t
2130pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2131    uint64_t offset, int size)
2132{
2133	struct pci_ahci_softc *sc = pi->pi_arg;
2134	uint32_t value;
2135
2136	assert(baridx == 5);
2137	assert(size == 4);
2138
2139	pthread_mutex_lock(&sc->mtx);
2140
2141	if (offset < AHCI_OFFSET)
2142		value = pci_ahci_host_read(sc, offset);
2143	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2144		value = pci_ahci_port_read(sc, offset);
2145	else {
2146		value = 0;
2147		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
2148	}
2149
2150	pthread_mutex_unlock(&sc->mtx);
2151
2152	return (value);
2153}
2154
2155static int
2156pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2157{
2158	char bident[sizeof("XX:X:X")];
2159	struct blockif_ctxt *bctxt;
2160	struct pci_ahci_softc *sc;
2161	int ret, slots;
2162	MD5_CTX mdctx;
2163	u_char digest[16];
2164
2165	ret = 0;
2166
2167	if (opts == NULL) {
2168		fprintf(stderr, "pci_ahci: backing device required\n");
2169		return (1);
2170	}
2171
2172#ifdef AHCI_DEBUG
2173	dbg = fopen("/tmp/log", "w+");
2174#endif
2175
2176	sc = calloc(1, sizeof(struct pci_ahci_softc));
2177	pi->pi_arg = sc;
2178	sc->asc_pi = pi;
2179	sc->ports = MAX_PORTS;
2180
2181	/*
2182	 * Only use port 0 for a backing device. All other ports will be
2183	 * marked as unused
2184	 */
2185	sc->port[0].atapi = atapi;
2186
2187	/*
2188	 * Attempt to open the backing image. Use the PCI
2189	 * slot/func for the identifier string.
2190	 */
2191	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2192	bctxt = blockif_open(opts, bident);
2193	if (bctxt == NULL) {
2194		ret = 1;
2195		goto open_fail;
2196	}
2197	sc->port[0].bctx = bctxt;
2198	sc->port[0].pr_sc = sc;
2199
2200	/*
2201	 * Create an identifier for the backing file. Use parts of the
2202	 * md5 sum of the filename
2203	 */
2204	MD5Init(&mdctx);
2205	MD5Update(&mdctx, opts, strlen(opts));
2206	MD5Final(digest, &mdctx);
2207	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2208	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
2209
2210	/*
2211	 * Allocate blockif request structures and add them
2212	 * to the free list
2213	 */
2214	pci_ahci_ioreq_init(&sc->port[0]);
2215
2216	pthread_mutex_init(&sc->mtx, NULL);
2217
2218	/* Intel ICH8 AHCI */
2219	slots = sc->port[0].ioqsz;
2220	if (slots > 32)
2221		slots = 32;
2222	--slots;
2223	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2224	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2225	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2226	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2227	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2228
2229	/* Only port 0 implemented */
2230	sc->pi = 1;
2231	sc->vs = 0x10300;
2232	sc->cap2 = AHCI_CAP2_APST;
2233	ahci_reset(sc);
2234
2235	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2236	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2237	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2238	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2239	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2240	pci_emul_add_msicap(pi, 1);
2241	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2242	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2243
2244	pci_lintr_request(pi);
2245
2246open_fail:
2247	if (ret) {
2248		blockif_close(sc->port[0].bctx);
2249		free(sc);
2250	}
2251
2252	return (ret);
2253}
2254
2255static int
2256pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2257{
2258
2259	return (pci_ahci_init(ctx, pi, opts, 0));
2260}
2261
2262static int
2263pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2264{
2265
2266	return (pci_ahci_init(ctx, pi, opts, 1));
2267}
2268
2269/*
2270 * Use separate emulation names to distinguish drive and atapi devices
2271 */
2272struct pci_devemu pci_de_ahci_hd = {
2273	.pe_emu =	"ahci-hd",
2274	.pe_init =	pci_ahci_hd_init,
2275	.pe_barwrite =	pci_ahci_write,
2276	.pe_barread =	pci_ahci_read
2277};
2278PCI_EMUL_SET(pci_de_ahci_hd);
2279
2280struct pci_devemu pci_de_ahci_cd = {
2281	.pe_emu =	"ahci-cd",
2282	.pe_init =	pci_ahci_atapi_init,
2283	.pe_barwrite =	pci_ahci_write,
2284	.pe_barread =	pci_ahci_read
2285};
2286PCI_EMUL_SET(pci_de_ahci_cd);
2287