pci_ahci.c revision 304421
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 304421 2016-08-18 11:45:46Z mav $
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 304421 2016-08-18 11:45:46Z mav $");
32
33#include <sys/param.h>
34#include <sys/linker_set.h>
35#include <sys/stat.h>
36#include <sys/uio.h>
37#include <sys/ioctl.h>
38#include <sys/disk.h>
39#include <sys/ata.h>
40#include <sys/endian.h>
41
42#include <errno.h>
43#include <fcntl.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <stdint.h>
47#include <string.h>
48#include <strings.h>
49#include <unistd.h>
50#include <assert.h>
51#include <pthread.h>
52#include <pthread_np.h>
53#include <inttypes.h>
54#include <md5.h>
55
56#include "bhyverun.h"
57#include "pci_emul.h"
58#include "ahci.h"
59#include "block_if.h"
60
61#define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
62#define	MAX_PORTS	32	/* AHCI supports 32 ports */
63
64#define	PxSIG_ATA	0x00000101 /* ATA drive */
65#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
66
67enum sata_fis_type {
68	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
69	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
70	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
71	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
72	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
73	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
74	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
75	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
76};
77
78/*
79 * SCSI opcodes
80 */
81#define	TEST_UNIT_READY		0x00
82#define	REQUEST_SENSE		0x03
83#define	INQUIRY			0x12
84#define	START_STOP_UNIT		0x1B
85#define	PREVENT_ALLOW		0x1E
86#define	READ_CAPACITY		0x25
87#define	READ_10			0x28
88#define	POSITION_TO_ELEMENT	0x2B
89#define	READ_TOC		0x43
90#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
91#define	MODE_SENSE_10		0x5A
92#define	REPORT_LUNS		0xA0
93#define	READ_12			0xA8
94#define	READ_CD			0xBE
95
96/*
97 * SCSI mode page codes
98 */
99#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
100#define	MODEPAGE_CD_CAPABILITIES	0x2A
101
102/*
103 * ATA commands
104 */
105#define	ATA_SF_ENAB_SATA_SF		0x10
106#define		ATA_SATA_SF_AN		0x05
107#define	ATA_SF_DIS_SATA_SF		0x90
108
109/*
110 * Debug printf
111 */
112#ifdef AHCI_DEBUG
113static FILE *dbg;
114#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
115#else
116#define DPRINTF(format, arg...)
117#endif
118#define WPRINTF(format, arg...) printf(format, ##arg)
119
120struct ahci_ioreq {
121	struct blockif_req io_req;
122	struct ahci_port *io_pr;
123	STAILQ_ENTRY(ahci_ioreq) io_flist;
124	TAILQ_ENTRY(ahci_ioreq) io_blist;
125	uint8_t *cfis;
126	uint32_t len;
127	uint32_t done;
128	int slot;
129	int more;
130};
131
132struct ahci_port {
133	struct blockif_ctxt *bctx;
134	struct pci_ahci_softc *pr_sc;
135	uint8_t *cmd_lst;
136	uint8_t *rfis;
137	char ident[20 + 1];
138	int port;
139	int atapi;
140	int reset;
141	int waitforclear;
142	int mult_sectors;
143	uint8_t xfermode;
144	uint8_t err_cfis[20];
145	uint8_t sense_key;
146	uint8_t asc;
147	u_int ccs;
148	uint32_t pending;
149
150	uint32_t clb;
151	uint32_t clbu;
152	uint32_t fb;
153	uint32_t fbu;
154	uint32_t is;
155	uint32_t ie;
156	uint32_t cmd;
157	uint32_t unused0;
158	uint32_t tfd;
159	uint32_t sig;
160	uint32_t ssts;
161	uint32_t sctl;
162	uint32_t serr;
163	uint32_t sact;
164	uint32_t ci;
165	uint32_t sntf;
166	uint32_t fbs;
167
168	/*
169	 * i/o request info
170	 */
171	struct ahci_ioreq *ioreq;
172	int ioqsz;
173	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
174	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
175};
176
177struct ahci_cmd_hdr {
178	uint16_t flags;
179	uint16_t prdtl;
180	uint32_t prdbc;
181	uint64_t ctba;
182	uint32_t reserved[4];
183};
184
185struct ahci_prdt_entry {
186	uint64_t dba;
187	uint32_t reserved;
188#define	DBCMASK		0x3fffff
189	uint32_t dbc;
190};
191
192struct pci_ahci_softc {
193	struct pci_devinst *asc_pi;
194	pthread_mutex_t	mtx;
195	int ports;
196	uint32_t cap;
197	uint32_t ghc;
198	uint32_t is;
199	uint32_t pi;
200	uint32_t vs;
201	uint32_t ccc_ctl;
202	uint32_t ccc_pts;
203	uint32_t em_loc;
204	uint32_t em_ctl;
205	uint32_t cap2;
206	uint32_t bohc;
207	uint32_t lintr;
208	struct ahci_port port[MAX_PORTS];
209};
210#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
211
212static void ahci_handle_port(struct ahci_port *p);
213
214static inline void lba_to_msf(uint8_t *buf, int lba)
215{
216	lba += 150;
217	buf[0] = (lba / 75) / 60;
218	buf[1] = (lba / 75) % 60;
219	buf[2] = lba % 75;
220}
221
222/*
223 * Generate HBA interrupts on global IS register write.
224 */
225static void
226ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
227{
228	struct pci_devinst *pi = sc->asc_pi;
229	struct ahci_port *p;
230	int i, nmsg;
231	uint32_t mmask;
232
233	/* Update global IS from PxIS/PxIE. */
234	for (i = 0; i < sc->ports; i++) {
235		p = &sc->port[i];
236		if (p->is & p->ie)
237			sc->is |= (1 << i);
238	}
239	DPRINTF("%s(%08x) %08x\n", __func__, mask, sc->is);
240
241	/* If there is nothing enabled -- clear legacy interrupt and exit. */
242	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
243		if (sc->lintr) {
244			pci_lintr_deassert(pi);
245			sc->lintr = 0;
246		}
247		return;
248	}
249
250	/* If there is anything and no MSI -- assert legacy interrupt. */
251	nmsg = pci_msi_maxmsgnum(pi);
252	if (nmsg == 0) {
253		if (!sc->lintr) {
254			sc->lintr = 1;
255			pci_lintr_assert(pi);
256		}
257		return;
258	}
259
260	/* Assert respective MSIs for ports that were touched. */
261	for (i = 0; i < nmsg; i++) {
262		if (sc->ports <= nmsg || i < nmsg - 1)
263			mmask = 1 << i;
264		else
265			mmask = 0xffffffff << i;
266		if (sc->is & mask && mmask & mask)
267			pci_generate_msi(pi, i);
268	}
269}
270
271/*
272 * Generate HBA interrupt on specific port event.
273 */
274static void
275ahci_port_intr(struct ahci_port *p)
276{
277	struct pci_ahci_softc *sc = p->pr_sc;
278	struct pci_devinst *pi = sc->asc_pi;
279	int nmsg;
280
281	DPRINTF("%s(%d) %08x/%08x %08x\n", __func__,
282	    p->port, p->is, p->ie, sc->is);
283
284	/* If there is nothing enabled -- we are done. */
285	if ((p->is & p->ie) == 0)
286		return;
287
288	/* In case of non-shared MSI always generate interrupt. */
289	nmsg = pci_msi_maxmsgnum(pi);
290	if (sc->ports <= nmsg || p->port < nmsg - 1) {
291		sc->is |= (1 << p->port);
292		if ((sc->ghc & AHCI_GHC_IE) == 0)
293			return;
294		pci_generate_msi(pi, p->port);
295		return;
296	}
297
298	/* If IS for this port is already set -- do nothing. */
299	if (sc->is & (1 << p->port))
300		return;
301
302	sc->is |= (1 << p->port);
303
304	/* If interrupts are enabled -- generate one. */
305	if ((sc->ghc & AHCI_GHC_IE) == 0)
306		return;
307	if (nmsg > 0) {
308		pci_generate_msi(pi, nmsg - 1);
309	} else if (!sc->lintr) {
310		sc->lintr = 1;
311		pci_lintr_assert(pi);
312	}
313}
314
315static void
316ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
317{
318	int offset, len, irq;
319
320	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
321		return;
322
323	switch (ft) {
324	case FIS_TYPE_REGD2H:
325		offset = 0x40;
326		len = 20;
327		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
328		break;
329	case FIS_TYPE_SETDEVBITS:
330		offset = 0x58;
331		len = 8;
332		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
333		break;
334	case FIS_TYPE_PIOSETUP:
335		offset = 0x20;
336		len = 20;
337		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
338		break;
339	default:
340		WPRINTF("unsupported fis type %d\n", ft);
341		return;
342	}
343	if (fis[2] & ATA_S_ERROR) {
344		p->waitforclear = 1;
345		irq |= AHCI_P_IX_TFE;
346	}
347	memcpy(p->rfis + offset, fis, len);
348	if (irq) {
349		if (~p->is & irq) {
350			p->is |= irq;
351			ahci_port_intr(p);
352		}
353	}
354}
355
356static void
357ahci_write_fis_piosetup(struct ahci_port *p)
358{
359	uint8_t fis[20];
360
361	memset(fis, 0, sizeof(fis));
362	fis[0] = FIS_TYPE_PIOSETUP;
363	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
364}
365
366static void
367ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
368{
369	uint8_t fis[8];
370	uint8_t error;
371
372	error = (tfd >> 8) & 0xff;
373	tfd &= 0x77;
374	memset(fis, 0, sizeof(fis));
375	fis[0] = FIS_TYPE_SETDEVBITS;
376	fis[1] = (1 << 6);
377	fis[2] = tfd;
378	fis[3] = error;
379	if (fis[2] & ATA_S_ERROR) {
380		p->err_cfis[0] = slot;
381		p->err_cfis[2] = tfd;
382		p->err_cfis[3] = error;
383		memcpy(&p->err_cfis[4], cfis + 4, 16);
384	} else {
385		*(uint32_t *)(fis + 4) = (1 << slot);
386		p->sact &= ~(1 << slot);
387	}
388	p->tfd &= ~0x77;
389	p->tfd |= tfd;
390	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
391}
392
393static void
394ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
395{
396	uint8_t fis[20];
397	uint8_t error;
398
399	error = (tfd >> 8) & 0xff;
400	memset(fis, 0, sizeof(fis));
401	fis[0] = FIS_TYPE_REGD2H;
402	fis[1] = (1 << 6);
403	fis[2] = tfd & 0xff;
404	fis[3] = error;
405	fis[4] = cfis[4];
406	fis[5] = cfis[5];
407	fis[6] = cfis[6];
408	fis[7] = cfis[7];
409	fis[8] = cfis[8];
410	fis[9] = cfis[9];
411	fis[10] = cfis[10];
412	fis[11] = cfis[11];
413	fis[12] = cfis[12];
414	fis[13] = cfis[13];
415	if (fis[2] & ATA_S_ERROR) {
416		p->err_cfis[0] = 0x80;
417		p->err_cfis[2] = tfd & 0xff;
418		p->err_cfis[3] = error;
419		memcpy(&p->err_cfis[4], cfis + 4, 16);
420	} else
421		p->ci &= ~(1 << slot);
422	p->tfd = tfd;
423	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
424}
425
426static void
427ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
428{
429	uint8_t fis[20];
430
431	p->tfd = ATA_S_READY | ATA_S_DSC;
432	memset(fis, 0, sizeof(fis));
433	fis[0] = FIS_TYPE_REGD2H;
434	fis[1] = 0;			/* No interrupt */
435	fis[2] = p->tfd;		/* Status */
436	fis[3] = 0;			/* No error */
437	p->ci &= ~(1 << slot);
438	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
439}
440
441static void
442ahci_write_reset_fis_d2h(struct ahci_port *p)
443{
444	uint8_t fis[20];
445
446	memset(fis, 0, sizeof(fis));
447	fis[0] = FIS_TYPE_REGD2H;
448	fis[3] = 1;
449	fis[4] = 1;
450	if (p->atapi) {
451		fis[5] = 0x14;
452		fis[6] = 0xeb;
453	}
454	fis[12] = 1;
455	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
456}
457
458static void
459ahci_check_stopped(struct ahci_port *p)
460{
461	/*
462	 * If we are no longer processing the command list and nothing
463	 * is in-flight, clear the running bit, the current command
464	 * slot, the command issue and active bits.
465	 */
466	if (!(p->cmd & AHCI_P_CMD_ST)) {
467		if (p->pending == 0) {
468			p->ccs = 0;
469			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
470			p->ci = 0;
471			p->sact = 0;
472			p->waitforclear = 0;
473		}
474	}
475}
476
477static void
478ahci_port_stop(struct ahci_port *p)
479{
480	struct ahci_ioreq *aior;
481	uint8_t *cfis;
482	int slot;
483	int ncq;
484	int error;
485
486	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
487
488	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
489		/*
490		 * Try to cancel the outstanding blockif request.
491		 */
492		error = blockif_cancel(p->bctx, &aior->io_req);
493		if (error != 0)
494			continue;
495
496		slot = aior->slot;
497		cfis = aior->cfis;
498		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
499		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
500		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
501			ncq = 1;
502
503		if (ncq)
504			p->sact &= ~(1 << slot);
505		else
506			p->ci &= ~(1 << slot);
507
508		/*
509		 * This command is now done.
510		 */
511		p->pending &= ~(1 << slot);
512
513		/*
514		 * Delete the blockif request from the busy list
515		 */
516		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
517
518		/*
519		 * Move the blockif request back to the free list
520		 */
521		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
522	}
523
524	ahci_check_stopped(p);
525}
526
527static void
528ahci_port_reset(struct ahci_port *pr)
529{
530	pr->serr = 0;
531	pr->sact = 0;
532	pr->xfermode = ATA_UDMA6;
533	pr->mult_sectors = 128;
534
535	if (!pr->bctx) {
536		pr->ssts = ATA_SS_DET_NO_DEVICE;
537		pr->sig = 0xFFFFFFFF;
538		pr->tfd = 0x7F;
539		return;
540	}
541	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
542	if (pr->sctl & ATA_SC_SPD_MASK)
543		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
544	else
545		pr->ssts |= ATA_SS_SPD_GEN3;
546	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
547	if (!pr->atapi) {
548		pr->sig = PxSIG_ATA;
549		pr->tfd |= ATA_S_READY;
550	} else
551		pr->sig = PxSIG_ATAPI;
552	ahci_write_reset_fis_d2h(pr);
553}
554
555static void
556ahci_reset(struct pci_ahci_softc *sc)
557{
558	int i;
559
560	sc->ghc = AHCI_GHC_AE;
561	sc->is = 0;
562
563	if (sc->lintr) {
564		pci_lintr_deassert(sc->asc_pi);
565		sc->lintr = 0;
566	}
567
568	for (i = 0; i < sc->ports; i++) {
569		sc->port[i].ie = 0;
570		sc->port[i].is = 0;
571		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
572		if (sc->port[i].bctx)
573			sc->port[i].cmd |= AHCI_P_CMD_CPS;
574		sc->port[i].sctl = 0;
575		ahci_port_reset(&sc->port[i]);
576	}
577}
578
579static void
580ata_string(uint8_t *dest, const char *src, int len)
581{
582	int i;
583
584	for (i = 0; i < len; i++) {
585		if (*src)
586			dest[i ^ 1] = *src++;
587		else
588			dest[i ^ 1] = ' ';
589	}
590}
591
592static void
593atapi_string(uint8_t *dest, const char *src, int len)
594{
595	int i;
596
597	for (i = 0; i < len; i++) {
598		if (*src)
599			dest[i] = *src++;
600		else
601			dest[i] = ' ';
602	}
603}
604
605/*
606 * Build up the iovec based on the PRDT, 'done' and 'len'.
607 */
608static void
609ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
610    struct ahci_prdt_entry *prdt, uint16_t prdtl)
611{
612	struct blockif_req *breq = &aior->io_req;
613	int i, j, skip, todo, left, extra;
614	uint32_t dbcsz;
615
616	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
617	skip = aior->done;
618	left = aior->len - aior->done;
619	todo = 0;
620	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
621	    i++, prdt++) {
622		dbcsz = (prdt->dbc & DBCMASK) + 1;
623		/* Skip already done part of the PRDT */
624		if (dbcsz <= skip) {
625			skip -= dbcsz;
626			continue;
627		}
628		dbcsz -= skip;
629		if (dbcsz > left)
630			dbcsz = left;
631		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
632		    prdt->dba + skip, dbcsz);
633		breq->br_iov[j].iov_len = dbcsz;
634		todo += dbcsz;
635		left -= dbcsz;
636		skip = 0;
637		j++;
638	}
639
640	/* If we got limited by IOV length, round I/O down to sector size. */
641	if (j == BLOCKIF_IOV_MAX) {
642		extra = todo % blockif_sectsz(p->bctx);
643		todo -= extra;
644		assert(todo > 0);
645		while (extra > 0) {
646			if (breq->br_iov[j - 1].iov_len > extra) {
647				breq->br_iov[j - 1].iov_len -= extra;
648				break;
649			}
650			extra -= breq->br_iov[j - 1].iov_len;
651			j--;
652		}
653	}
654
655	breq->br_iovcnt = j;
656	breq->br_resid = todo;
657	aior->done += todo;
658	aior->more = (aior->done < aior->len && i < prdtl);
659}
660
661static void
662ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
663{
664	struct ahci_ioreq *aior;
665	struct blockif_req *breq;
666	struct ahci_prdt_entry *prdt;
667	struct ahci_cmd_hdr *hdr;
668	uint64_t lba;
669	uint32_t len;
670	int err, first, ncq, readop;
671
672	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
673	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
674	ncq = 0;
675	readop = 1;
676	first = (done == 0);
677
678	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
679	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
680	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
681	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
682		readop = 0;
683
684	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
685	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
686		lba = ((uint64_t)cfis[10] << 40) |
687			((uint64_t)cfis[9] << 32) |
688			((uint64_t)cfis[8] << 24) |
689			((uint64_t)cfis[6] << 16) |
690			((uint64_t)cfis[5] << 8) |
691			cfis[4];
692		len = cfis[11] << 8 | cfis[3];
693		if (!len)
694			len = 65536;
695		ncq = 1;
696	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
697	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
698	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
699		lba = ((uint64_t)cfis[10] << 40) |
700			((uint64_t)cfis[9] << 32) |
701			((uint64_t)cfis[8] << 24) |
702			((uint64_t)cfis[6] << 16) |
703			((uint64_t)cfis[5] << 8) |
704			cfis[4];
705		len = cfis[13] << 8 | cfis[12];
706		if (!len)
707			len = 65536;
708	} else {
709		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
710			(cfis[5] << 8) | cfis[4];
711		len = cfis[12];
712		if (!len)
713			len = 256;
714	}
715	lba *= blockif_sectsz(p->bctx);
716	len *= blockif_sectsz(p->bctx);
717
718	/* Pull request off free list */
719	aior = STAILQ_FIRST(&p->iofhd);
720	assert(aior != NULL);
721	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
722
723	aior->cfis = cfis;
724	aior->slot = slot;
725	aior->len = len;
726	aior->done = done;
727	breq = &aior->io_req;
728	breq->br_offset = lba + done;
729	ahci_build_iov(p, aior, prdt, hdr->prdtl);
730
731	/* Mark this command in-flight. */
732	p->pending |= 1 << slot;
733
734	/* Stuff request onto busy list. */
735	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
736
737	if (ncq && first)
738		ahci_write_fis_d2h_ncq(p, slot);
739
740	if (readop)
741		err = blockif_read(p->bctx, breq);
742	else
743		err = blockif_write(p->bctx, breq);
744	assert(err == 0);
745}
746
747static void
748ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
749{
750	struct ahci_ioreq *aior;
751	struct blockif_req *breq;
752	int err;
753
754	/*
755	 * Pull request off free list
756	 */
757	aior = STAILQ_FIRST(&p->iofhd);
758	assert(aior != NULL);
759	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
760	aior->cfis = cfis;
761	aior->slot = slot;
762	aior->len = 0;
763	aior->done = 0;
764	aior->more = 0;
765	breq = &aior->io_req;
766
767	/*
768	 * Mark this command in-flight.
769	 */
770	p->pending |= 1 << slot;
771
772	/*
773	 * Stuff request onto busy list
774	 */
775	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
776
777	err = blockif_flush(p->bctx, breq);
778	assert(err == 0);
779}
780
781static inline void
782read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
783		void *buf, int size)
784{
785	struct ahci_cmd_hdr *hdr;
786	struct ahci_prdt_entry *prdt;
787	void *to;
788	int i, len;
789
790	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
791	len = size;
792	to = buf;
793	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
794	for (i = 0; i < hdr->prdtl && len; i++) {
795		uint8_t *ptr;
796		uint32_t dbcsz;
797		int sublen;
798
799		dbcsz = (prdt->dbc & DBCMASK) + 1;
800		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
801		sublen = len < dbcsz ? len : dbcsz;
802		memcpy(to, ptr, sublen);
803		len -= sublen;
804		to += sublen;
805		prdt++;
806	}
807}
808
809static void
810ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
811{
812	struct ahci_ioreq *aior;
813	struct blockif_req *breq;
814	uint8_t *entry;
815	uint64_t elba;
816	uint32_t len, elen;
817	int err, first, ncq;
818	uint8_t buf[512];
819
820	first = (done == 0);
821	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
822		len = (uint16_t)cfis[13] << 8 | cfis[12];
823		len *= 512;
824		ncq = 0;
825	} else { /* ATA_SEND_FPDMA_QUEUED */
826		len = (uint16_t)cfis[11] << 8 | cfis[3];
827		len *= 512;
828		ncq = 1;
829	}
830	read_prdt(p, slot, cfis, buf, sizeof(buf));
831
832next:
833	entry = &buf[done];
834	elba = ((uint64_t)entry[5] << 40) |
835		((uint64_t)entry[4] << 32) |
836		((uint64_t)entry[3] << 24) |
837		((uint64_t)entry[2] << 16) |
838		((uint64_t)entry[1] << 8) |
839		entry[0];
840	elen = (uint16_t)entry[7] << 8 | entry[6];
841	done += 8;
842	if (elen == 0) {
843		if (done >= len) {
844			if (ncq) {
845				if (first)
846					ahci_write_fis_d2h_ncq(p, slot);
847				ahci_write_fis_sdb(p, slot, cfis,
848				    ATA_S_READY | ATA_S_DSC);
849			} else {
850				ahci_write_fis_d2h(p, slot, cfis,
851				    ATA_S_READY | ATA_S_DSC);
852			}
853			p->pending &= ~(1 << slot);
854			ahci_check_stopped(p);
855			if (!first)
856				ahci_handle_port(p);
857			return;
858		}
859		goto next;
860	}
861
862	/*
863	 * Pull request off free list
864	 */
865	aior = STAILQ_FIRST(&p->iofhd);
866	assert(aior != NULL);
867	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
868	aior->cfis = cfis;
869	aior->slot = slot;
870	aior->len = len;
871	aior->done = done;
872	aior->more = (len != done);
873
874	breq = &aior->io_req;
875	breq->br_offset = elba * blockif_sectsz(p->bctx);
876	breq->br_resid = elen * blockif_sectsz(p->bctx);
877
878	/*
879	 * Mark this command in-flight.
880	 */
881	p->pending |= 1 << slot;
882
883	/*
884	 * Stuff request onto busy list
885	 */
886	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
887
888	if (ncq && first)
889		ahci_write_fis_d2h_ncq(p, slot);
890
891	err = blockif_delete(p->bctx, breq);
892	assert(err == 0);
893}
894
895static inline void
896write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
897		void *buf, int size)
898{
899	struct ahci_cmd_hdr *hdr;
900	struct ahci_prdt_entry *prdt;
901	void *from;
902	int i, len;
903
904	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
905	len = size;
906	from = buf;
907	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
908	for (i = 0; i < hdr->prdtl && len; i++) {
909		uint8_t *ptr;
910		uint32_t dbcsz;
911		int sublen;
912
913		dbcsz = (prdt->dbc & DBCMASK) + 1;
914		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
915		sublen = len < dbcsz ? len : dbcsz;
916		memcpy(ptr, from, sublen);
917		len -= sublen;
918		from += sublen;
919		prdt++;
920	}
921	hdr->prdbc = size - len;
922}
923
924static void
925ahci_checksum(uint8_t *buf, int size)
926{
927	int i;
928	uint8_t sum = 0;
929
930	for (i = 0; i < size - 1; i++)
931		sum += buf[i];
932	buf[size - 1] = 0x100 - sum;
933}
934
935static void
936ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
937{
938	struct ahci_cmd_hdr *hdr;
939	uint8_t buf[512];
940
941	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
942	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
943	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
944		ahci_write_fis_d2h(p, slot, cfis,
945		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
946		return;
947	}
948
949	memset(buf, 0, sizeof(buf));
950	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
951	ahci_checksum(buf, sizeof(buf));
952
953	if (cfis[2] == ATA_READ_LOG_EXT)
954		ahci_write_fis_piosetup(p);
955	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
956	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
957}
958
959static void
960handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
961{
962	struct ahci_cmd_hdr *hdr;
963
964	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
965	if (p->atapi || hdr->prdtl == 0) {
966		ahci_write_fis_d2h(p, slot, cfis,
967		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
968	} else {
969		uint16_t buf[256];
970		uint64_t sectors;
971		int sectsz, psectsz, psectoff, candelete, ro;
972		uint16_t cyl;
973		uint8_t sech, heads;
974
975		ro = blockif_is_ro(p->bctx);
976		candelete = blockif_candelete(p->bctx);
977		sectsz = blockif_sectsz(p->bctx);
978		sectors = blockif_size(p->bctx) / sectsz;
979		blockif_chs(p->bctx, &cyl, &heads, &sech);
980		blockif_psectsz(p->bctx, &psectsz, &psectoff);
981		memset(buf, 0, sizeof(buf));
982		buf[0] = 0x0040;
983		buf[1] = cyl;
984		buf[3] = heads;
985		buf[6] = sech;
986		ata_string((uint8_t *)(buf+10), p->ident, 20);
987		ata_string((uint8_t *)(buf+23), "001", 8);
988		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
989		buf[47] = (0x8000 | 128);
990		buf[48] = 0;
991		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
992		buf[50] = (1 << 14);
993		buf[53] = (1 << 1 | 1 << 2);
994		if (p->mult_sectors)
995			buf[59] = (0x100 | p->mult_sectors);
996		if (sectors <= 0x0fffffff) {
997			buf[60] = sectors;
998			buf[61] = (sectors >> 16);
999		} else {
1000			buf[60] = 0xffff;
1001			buf[61] = 0x0fff;
1002		}
1003		buf[63] = 0x7;
1004		if (p->xfermode & ATA_WDMA0)
1005			buf[63] |= (1 << ((p->xfermode & 7) + 8));
1006		buf[64] = 0x3;
1007		buf[65] = 120;
1008		buf[66] = 120;
1009		buf[67] = 120;
1010		buf[68] = 120;
1011		buf[69] = 0;
1012		buf[75] = 31;
1013		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1014			   ATA_SUPPORT_NCQ);
1015		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1016			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
1017		buf[80] = 0x3f0;
1018		buf[81] = 0x28;
1019		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
1020			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1021		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1022			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1023		buf[84] = (1 << 14);
1024		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
1025			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1026		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1027			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1028		buf[87] = (1 << 14);
1029		buf[88] = 0x7f;
1030		if (p->xfermode & ATA_UDMA0)
1031			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1032		buf[100] = sectors;
1033		buf[101] = (sectors >> 16);
1034		buf[102] = (sectors >> 32);
1035		buf[103] = (sectors >> 48);
1036		if (candelete && !ro) {
1037			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1038			buf[105] = 1;
1039			buf[169] = ATA_SUPPORT_DSM_TRIM;
1040		}
1041		buf[106] = 0x4000;
1042		buf[209] = 0x4000;
1043		if (psectsz > sectsz) {
1044			buf[106] |= 0x2000;
1045			buf[106] |= ffsl(psectsz / sectsz) - 1;
1046			buf[209] |= (psectoff / sectsz);
1047		}
1048		if (sectsz > 512) {
1049			buf[106] |= 0x1000;
1050			buf[117] = sectsz / 2;
1051			buf[118] = ((sectsz / 2) >> 16);
1052		}
1053		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1054		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1055		buf[222] = 0x1020;
1056		buf[255] = 0x00a5;
1057		ahci_checksum((uint8_t *)buf, sizeof(buf));
1058		ahci_write_fis_piosetup(p);
1059		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1060		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1061	}
1062}
1063
1064static void
1065handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1066{
1067	if (!p->atapi) {
1068		ahci_write_fis_d2h(p, slot, cfis,
1069		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1070	} else {
1071		uint16_t buf[256];
1072
1073		memset(buf, 0, sizeof(buf));
1074		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
1075		ata_string((uint8_t *)(buf+10), p->ident, 20);
1076		ata_string((uint8_t *)(buf+23), "001", 8);
1077		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
1078		buf[49] = (1 << 9 | 1 << 8);
1079		buf[50] = (1 << 14 | 1);
1080		buf[53] = (1 << 2 | 1 << 1);
1081		buf[62] = 0x3f;
1082		buf[63] = 7;
1083		if (p->xfermode & ATA_WDMA0)
1084			buf[63] |= (1 << ((p->xfermode & 7) + 8));
1085		buf[64] = 3;
1086		buf[65] = 120;
1087		buf[66] = 120;
1088		buf[67] = 120;
1089		buf[68] = 120;
1090		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1091		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1092		buf[78] = (1 << 5);
1093		buf[80] = 0x3f0;
1094		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1095			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1096		buf[83] = (1 << 14);
1097		buf[84] = (1 << 14);
1098		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1099			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1100		buf[87] = (1 << 14);
1101		buf[88] = 0x7f;
1102		if (p->xfermode & ATA_UDMA0)
1103			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1104		buf[222] = 0x1020;
1105		buf[255] = 0x00a5;
1106		ahci_checksum((uint8_t *)buf, sizeof(buf));
1107		ahci_write_fis_piosetup(p);
1108		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1109		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1110	}
1111}
1112
1113static void
1114atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1115{
1116	uint8_t buf[36];
1117	uint8_t *acmd;
1118	int len;
1119	uint32_t tfd;
1120
1121	acmd = cfis + 0x40;
1122
1123	if (acmd[1] & 1) {		/* VPD */
1124		if (acmd[2] == 0) {	/* Supported VPD pages */
1125			buf[0] = 0x05;
1126			buf[1] = 0;
1127			buf[2] = 0;
1128			buf[3] = 1;
1129			buf[4] = 0;
1130			len = 4 + buf[3];
1131		} else {
1132			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1133			p->asc = 0x24;
1134			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1135			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1136			ahci_write_fis_d2h(p, slot, cfis, tfd);
1137			return;
1138		}
1139	} else {
1140		buf[0] = 0x05;
1141		buf[1] = 0x80;
1142		buf[2] = 0x00;
1143		buf[3] = 0x21;
1144		buf[4] = 31;
1145		buf[5] = 0;
1146		buf[6] = 0;
1147		buf[7] = 0;
1148		atapi_string(buf + 8, "BHYVE", 8);
1149		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1150		atapi_string(buf + 32, "001", 4);
1151		len = sizeof(buf);
1152	}
1153
1154	if (len > acmd[4])
1155		len = acmd[4];
1156	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1157	write_prdt(p, slot, cfis, buf, len);
1158	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1159}
1160
1161static void
1162atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1163{
1164	uint8_t buf[8];
1165	uint64_t sectors;
1166
1167	sectors = blockif_size(p->bctx) / 2048;
1168	be32enc(buf, sectors - 1);
1169	be32enc(buf + 4, 2048);
1170	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1171	write_prdt(p, slot, cfis, buf, sizeof(buf));
1172	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1173}
1174
1175static void
1176atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1177{
1178	uint8_t *acmd;
1179	uint8_t format;
1180	int len;
1181
1182	acmd = cfis + 0x40;
1183
1184	len = be16dec(acmd + 7);
1185	format = acmd[9] >> 6;
1186	switch (format) {
1187	case 0:
1188	{
1189		int msf, size;
1190		uint64_t sectors;
1191		uint8_t start_track, buf[20], *bp;
1192
1193		msf = (acmd[1] >> 1) & 1;
1194		start_track = acmd[6];
1195		if (start_track > 1 && start_track != 0xaa) {
1196			uint32_t tfd;
1197			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1198			p->asc = 0x24;
1199			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1200			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1201			ahci_write_fis_d2h(p, slot, cfis, tfd);
1202			return;
1203		}
1204		bp = buf + 2;
1205		*bp++ = 1;
1206		*bp++ = 1;
1207		if (start_track <= 1) {
1208			*bp++ = 0;
1209			*bp++ = 0x14;
1210			*bp++ = 1;
1211			*bp++ = 0;
1212			if (msf) {
1213				*bp++ = 0;
1214				lba_to_msf(bp, 0);
1215				bp += 3;
1216			} else {
1217				*bp++ = 0;
1218				*bp++ = 0;
1219				*bp++ = 0;
1220				*bp++ = 0;
1221			}
1222		}
1223		*bp++ = 0;
1224		*bp++ = 0x14;
1225		*bp++ = 0xaa;
1226		*bp++ = 0;
1227		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1228		sectors >>= 2;
1229		if (msf) {
1230			*bp++ = 0;
1231			lba_to_msf(bp, sectors);
1232			bp += 3;
1233		} else {
1234			be32enc(bp, sectors);
1235			bp += 4;
1236		}
1237		size = bp - buf;
1238		be16enc(buf, size - 2);
1239		if (len > size)
1240			len = size;
1241		write_prdt(p, slot, cfis, buf, len);
1242		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1243		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1244		break;
1245	}
1246	case 1:
1247	{
1248		uint8_t buf[12];
1249
1250		memset(buf, 0, sizeof(buf));
1251		buf[1] = 0xa;
1252		buf[2] = 0x1;
1253		buf[3] = 0x1;
1254		if (len > sizeof(buf))
1255			len = sizeof(buf);
1256		write_prdt(p, slot, cfis, buf, len);
1257		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1258		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1259		break;
1260	}
1261	case 2:
1262	{
1263		int msf, size;
1264		uint64_t sectors;
1265		uint8_t start_track, *bp, buf[50];
1266
1267		msf = (acmd[1] >> 1) & 1;
1268		start_track = acmd[6];
1269		bp = buf + 2;
1270		*bp++ = 1;
1271		*bp++ = 1;
1272
1273		*bp++ = 1;
1274		*bp++ = 0x14;
1275		*bp++ = 0;
1276		*bp++ = 0xa0;
1277		*bp++ = 0;
1278		*bp++ = 0;
1279		*bp++ = 0;
1280		*bp++ = 0;
1281		*bp++ = 1;
1282		*bp++ = 0;
1283		*bp++ = 0;
1284
1285		*bp++ = 1;
1286		*bp++ = 0x14;
1287		*bp++ = 0;
1288		*bp++ = 0xa1;
1289		*bp++ = 0;
1290		*bp++ = 0;
1291		*bp++ = 0;
1292		*bp++ = 0;
1293		*bp++ = 1;
1294		*bp++ = 0;
1295		*bp++ = 0;
1296
1297		*bp++ = 1;
1298		*bp++ = 0x14;
1299		*bp++ = 0;
1300		*bp++ = 0xa2;
1301		*bp++ = 0;
1302		*bp++ = 0;
1303		*bp++ = 0;
1304		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1305		sectors >>= 2;
1306		if (msf) {
1307			*bp++ = 0;
1308			lba_to_msf(bp, sectors);
1309			bp += 3;
1310		} else {
1311			be32enc(bp, sectors);
1312			bp += 4;
1313		}
1314
1315		*bp++ = 1;
1316		*bp++ = 0x14;
1317		*bp++ = 0;
1318		*bp++ = 1;
1319		*bp++ = 0;
1320		*bp++ = 0;
1321		*bp++ = 0;
1322		if (msf) {
1323			*bp++ = 0;
1324			lba_to_msf(bp, 0);
1325			bp += 3;
1326		} else {
1327			*bp++ = 0;
1328			*bp++ = 0;
1329			*bp++ = 0;
1330			*bp++ = 0;
1331		}
1332
1333		size = bp - buf;
1334		be16enc(buf, size - 2);
1335		if (len > size)
1336			len = size;
1337		write_prdt(p, slot, cfis, buf, len);
1338		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1339		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1340		break;
1341	}
1342	default:
1343	{
1344		uint32_t tfd;
1345
1346		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1347		p->asc = 0x24;
1348		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1349		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1350		ahci_write_fis_d2h(p, slot, cfis, tfd);
1351		break;
1352	}
1353	}
1354}
1355
1356static void
1357atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1358{
1359	uint8_t buf[16];
1360
1361	memset(buf, 0, sizeof(buf));
1362	buf[3] = 8;
1363
1364	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1365	write_prdt(p, slot, cfis, buf, sizeof(buf));
1366	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1367}
1368
1369static void
1370atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1371{
1372	struct ahci_ioreq *aior;
1373	struct ahci_cmd_hdr *hdr;
1374	struct ahci_prdt_entry *prdt;
1375	struct blockif_req *breq;
1376	struct pci_ahci_softc *sc;
1377	uint8_t *acmd;
1378	uint64_t lba;
1379	uint32_t len;
1380	int err;
1381
1382	sc = p->pr_sc;
1383	acmd = cfis + 0x40;
1384	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1385	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1386
1387	lba = be32dec(acmd + 2);
1388	if (acmd[0] == READ_10)
1389		len = be16dec(acmd + 7);
1390	else
1391		len = be32dec(acmd + 6);
1392	if (len == 0) {
1393		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1394		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1395	}
1396	lba *= 2048;
1397	len *= 2048;
1398
1399	/*
1400	 * Pull request off free list
1401	 */
1402	aior = STAILQ_FIRST(&p->iofhd);
1403	assert(aior != NULL);
1404	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1405	aior->cfis = cfis;
1406	aior->slot = slot;
1407	aior->len = len;
1408	aior->done = done;
1409	breq = &aior->io_req;
1410	breq->br_offset = lba + done;
1411	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1412
1413	/* Mark this command in-flight. */
1414	p->pending |= 1 << slot;
1415
1416	/* Stuff request onto busy list. */
1417	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1418
1419	err = blockif_read(p->bctx, breq);
1420	assert(err == 0);
1421}
1422
1423static void
1424atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1425{
1426	uint8_t buf[64];
1427	uint8_t *acmd;
1428	int len;
1429
1430	acmd = cfis + 0x40;
1431	len = acmd[4];
1432	if (len > sizeof(buf))
1433		len = sizeof(buf);
1434	memset(buf, 0, len);
1435	buf[0] = 0x70 | (1 << 7);
1436	buf[2] = p->sense_key;
1437	buf[7] = 10;
1438	buf[12] = p->asc;
1439	write_prdt(p, slot, cfis, buf, len);
1440	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1441	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1442}
1443
1444static void
1445atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1446{
1447	uint8_t *acmd = cfis + 0x40;
1448	uint32_t tfd;
1449
1450	switch (acmd[4] & 3) {
1451	case 0:
1452	case 1:
1453	case 3:
1454		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1455		tfd = ATA_S_READY | ATA_S_DSC;
1456		break;
1457	case 2:
1458		/* TODO eject media */
1459		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1460		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1461		p->asc = 0x53;
1462		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1463		break;
1464	}
1465	ahci_write_fis_d2h(p, slot, cfis, tfd);
1466}
1467
1468static void
1469atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1470{
1471	uint8_t *acmd;
1472	uint32_t tfd;
1473	uint8_t pc, code;
1474	int len;
1475
1476	acmd = cfis + 0x40;
1477	len = be16dec(acmd + 7);
1478	pc = acmd[2] >> 6;
1479	code = acmd[2] & 0x3f;
1480
1481	switch (pc) {
1482	case 0:
1483		switch (code) {
1484		case MODEPAGE_RW_ERROR_RECOVERY:
1485		{
1486			uint8_t buf[16];
1487
1488			if (len > sizeof(buf))
1489				len = sizeof(buf);
1490
1491			memset(buf, 0, sizeof(buf));
1492			be16enc(buf, 16 - 2);
1493			buf[2] = 0x70;
1494			buf[8] = 0x01;
1495			buf[9] = 16 - 10;
1496			buf[11] = 0x05;
1497			write_prdt(p, slot, cfis, buf, len);
1498			tfd = ATA_S_READY | ATA_S_DSC;
1499			break;
1500		}
1501		case MODEPAGE_CD_CAPABILITIES:
1502		{
1503			uint8_t buf[30];
1504
1505			if (len > sizeof(buf))
1506				len = sizeof(buf);
1507
1508			memset(buf, 0, sizeof(buf));
1509			be16enc(buf, 30 - 2);
1510			buf[2] = 0x70;
1511			buf[8] = 0x2A;
1512			buf[9] = 30 - 10;
1513			buf[10] = 0x08;
1514			buf[12] = 0x71;
1515			be16enc(&buf[18], 2);
1516			be16enc(&buf[20], 512);
1517			write_prdt(p, slot, cfis, buf, len);
1518			tfd = ATA_S_READY | ATA_S_DSC;
1519			break;
1520		}
1521		default:
1522			goto error;
1523			break;
1524		}
1525		break;
1526	case 3:
1527		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1528		p->asc = 0x39;
1529		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1530		break;
1531error:
1532	case 1:
1533	case 2:
1534		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1535		p->asc = 0x24;
1536		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1537		break;
1538	}
1539	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1540	ahci_write_fis_d2h(p, slot, cfis, tfd);
1541}
1542
1543static void
1544atapi_get_event_status_notification(struct ahci_port *p, int slot,
1545    uint8_t *cfis)
1546{
1547	uint8_t *acmd;
1548	uint32_t tfd;
1549
1550	acmd = cfis + 0x40;
1551
1552	/* we don't support asynchronous operation */
1553	if (!(acmd[1] & 1)) {
1554		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1555		p->asc = 0x24;
1556		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1557	} else {
1558		uint8_t buf[8];
1559		int len;
1560
1561		len = be16dec(acmd + 7);
1562		if (len > sizeof(buf))
1563			len = sizeof(buf);
1564
1565		memset(buf, 0, sizeof(buf));
1566		be16enc(buf, 8 - 2);
1567		buf[2] = 0x04;
1568		buf[3] = 0x10;
1569		buf[5] = 0x02;
1570		write_prdt(p, slot, cfis, buf, len);
1571		tfd = ATA_S_READY | ATA_S_DSC;
1572	}
1573	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1574	ahci_write_fis_d2h(p, slot, cfis, tfd);
1575}
1576
1577static void
1578handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1579{
1580	uint8_t *acmd;
1581
1582	acmd = cfis + 0x40;
1583
1584#ifdef AHCI_DEBUG
1585	{
1586		int i;
1587		DPRINTF("ACMD:");
1588		for (i = 0; i < 16; i++)
1589			DPRINTF("%02x ", acmd[i]);
1590		DPRINTF("\n");
1591	}
1592#endif
1593
1594	switch (acmd[0]) {
1595	case TEST_UNIT_READY:
1596		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1597		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1598		break;
1599	case INQUIRY:
1600		atapi_inquiry(p, slot, cfis);
1601		break;
1602	case READ_CAPACITY:
1603		atapi_read_capacity(p, slot, cfis);
1604		break;
1605	case PREVENT_ALLOW:
1606		/* TODO */
1607		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1608		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1609		break;
1610	case READ_TOC:
1611		atapi_read_toc(p, slot, cfis);
1612		break;
1613	case REPORT_LUNS:
1614		atapi_report_luns(p, slot, cfis);
1615		break;
1616	case READ_10:
1617	case READ_12:
1618		atapi_read(p, slot, cfis, 0);
1619		break;
1620	case REQUEST_SENSE:
1621		atapi_request_sense(p, slot, cfis);
1622		break;
1623	case START_STOP_UNIT:
1624		atapi_start_stop_unit(p, slot, cfis);
1625		break;
1626	case MODE_SENSE_10:
1627		atapi_mode_sense(p, slot, cfis);
1628		break;
1629	case GET_EVENT_STATUS_NOTIFICATION:
1630		atapi_get_event_status_notification(p, slot, cfis);
1631		break;
1632	default:
1633		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1634		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1635		p->asc = 0x20;
1636		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1637				ATA_S_READY | ATA_S_ERROR);
1638		break;
1639	}
1640}
1641
1642static void
1643ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1644{
1645
1646	p->tfd |= ATA_S_BUSY;
1647	switch (cfis[2]) {
1648	case ATA_ATA_IDENTIFY:
1649		handle_identify(p, slot, cfis);
1650		break;
1651	case ATA_SETFEATURES:
1652	{
1653		switch (cfis[3]) {
1654		case ATA_SF_ENAB_SATA_SF:
1655			switch (cfis[12]) {
1656			case ATA_SATA_SF_AN:
1657				p->tfd = ATA_S_DSC | ATA_S_READY;
1658				break;
1659			default:
1660				p->tfd = ATA_S_ERROR | ATA_S_READY;
1661				p->tfd |= (ATA_ERROR_ABORT << 8);
1662				break;
1663			}
1664			break;
1665		case ATA_SF_ENAB_WCACHE:
1666		case ATA_SF_DIS_WCACHE:
1667		case ATA_SF_ENAB_RCACHE:
1668		case ATA_SF_DIS_RCACHE:
1669			p->tfd = ATA_S_DSC | ATA_S_READY;
1670			break;
1671		case ATA_SF_SETXFER:
1672		{
1673			switch (cfis[12] & 0xf8) {
1674			case ATA_PIO:
1675			case ATA_PIO0:
1676				break;
1677			case ATA_WDMA0:
1678			case ATA_UDMA0:
1679				p->xfermode = (cfis[12] & 0x7);
1680				break;
1681			}
1682			p->tfd = ATA_S_DSC | ATA_S_READY;
1683			break;
1684		}
1685		default:
1686			p->tfd = ATA_S_ERROR | ATA_S_READY;
1687			p->tfd |= (ATA_ERROR_ABORT << 8);
1688			break;
1689		}
1690		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1691		break;
1692	}
1693	case ATA_SET_MULTI:
1694		if (cfis[12] != 0 &&
1695			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1696			p->tfd = ATA_S_ERROR | ATA_S_READY;
1697			p->tfd |= (ATA_ERROR_ABORT << 8);
1698		} else {
1699			p->mult_sectors = cfis[12];
1700			p->tfd = ATA_S_DSC | ATA_S_READY;
1701		}
1702		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1703		break;
1704	case ATA_READ:
1705	case ATA_WRITE:
1706	case ATA_READ48:
1707	case ATA_WRITE48:
1708	case ATA_READ_MUL:
1709	case ATA_WRITE_MUL:
1710	case ATA_READ_MUL48:
1711	case ATA_WRITE_MUL48:
1712	case ATA_READ_DMA:
1713	case ATA_WRITE_DMA:
1714	case ATA_READ_DMA48:
1715	case ATA_WRITE_DMA48:
1716	case ATA_READ_FPDMA_QUEUED:
1717	case ATA_WRITE_FPDMA_QUEUED:
1718		ahci_handle_rw(p, slot, cfis, 0);
1719		break;
1720	case ATA_FLUSHCACHE:
1721	case ATA_FLUSHCACHE48:
1722		ahci_handle_flush(p, slot, cfis);
1723		break;
1724	case ATA_DATA_SET_MANAGEMENT:
1725		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1726		    cfis[13] == 0 && cfis[12] == 1) {
1727			ahci_handle_dsm_trim(p, slot, cfis, 0);
1728			break;
1729		}
1730		ahci_write_fis_d2h(p, slot, cfis,
1731		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1732		break;
1733	case ATA_SEND_FPDMA_QUEUED:
1734		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1735		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1736		    cfis[11] == 0 && cfis[3] == 1) {
1737			ahci_handle_dsm_trim(p, slot, cfis, 0);
1738			break;
1739		}
1740		ahci_write_fis_d2h(p, slot, cfis,
1741		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1742		break;
1743	case ATA_READ_LOG_EXT:
1744	case ATA_READ_LOG_DMA_EXT:
1745		ahci_handle_read_log(p, slot, cfis);
1746		break;
1747	case ATA_SECURITY_FREEZE_LOCK:
1748	case ATA_SMART_CMD:
1749	case ATA_NOP:
1750		ahci_write_fis_d2h(p, slot, cfis,
1751		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1752		break;
1753	case ATA_CHECK_POWER_MODE:
1754		cfis[12] = 0xff;	/* always on */
1755		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1756		break;
1757	case ATA_STANDBY_CMD:
1758	case ATA_STANDBY_IMMEDIATE:
1759	case ATA_IDLE_CMD:
1760	case ATA_IDLE_IMMEDIATE:
1761	case ATA_SLEEP:
1762	case ATA_READ_VERIFY:
1763	case ATA_READ_VERIFY48:
1764		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1765		break;
1766	case ATA_ATAPI_IDENTIFY:
1767		handle_atapi_identify(p, slot, cfis);
1768		break;
1769	case ATA_PACKET_CMD:
1770		if (!p->atapi) {
1771			ahci_write_fis_d2h(p, slot, cfis,
1772			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1773		} else
1774			handle_packet_cmd(p, slot, cfis);
1775		break;
1776	default:
1777		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1778		ahci_write_fis_d2h(p, slot, cfis,
1779		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1780		break;
1781	}
1782}
1783
1784static void
1785ahci_handle_slot(struct ahci_port *p, int slot)
1786{
1787	struct ahci_cmd_hdr *hdr;
1788#ifdef AHCI_DEBUG
1789	struct ahci_prdt_entry *prdt;
1790#endif
1791	struct pci_ahci_softc *sc;
1792	uint8_t *cfis;
1793#ifdef AHCI_DEBUG
1794	int cfl, i;
1795#endif
1796
1797	sc = p->pr_sc;
1798	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1799#ifdef AHCI_DEBUG
1800	cfl = (hdr->flags & 0x1f) * 4;
1801#endif
1802	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1803			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1804#ifdef AHCI_DEBUG
1805	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1806
1807	DPRINTF("\ncfis:");
1808	for (i = 0; i < cfl; i++) {
1809		if (i % 10 == 0)
1810			DPRINTF("\n");
1811		DPRINTF("%02x ", cfis[i]);
1812	}
1813	DPRINTF("\n");
1814
1815	for (i = 0; i < hdr->prdtl; i++) {
1816		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1817		prdt++;
1818	}
1819#endif
1820
1821	if (cfis[0] != FIS_TYPE_REGH2D) {
1822		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1823		return;
1824	}
1825
1826	if (cfis[1] & 0x80) {
1827		ahci_handle_cmd(p, slot, cfis);
1828	} else {
1829		if (cfis[15] & (1 << 2))
1830			p->reset = 1;
1831		else if (p->reset) {
1832			p->reset = 0;
1833			ahci_port_reset(p);
1834		}
1835		p->ci &= ~(1 << slot);
1836	}
1837}
1838
1839static void
1840ahci_handle_port(struct ahci_port *p)
1841{
1842
1843	if (!(p->cmd & AHCI_P_CMD_ST))
1844		return;
1845
1846	/*
1847	 * Search for any new commands to issue ignoring those that
1848	 * are already in-flight.  Stop if device is busy or in error.
1849	 */
1850	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1851		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1852			break;
1853		if (p->waitforclear)
1854			break;
1855		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1856			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1857			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1858			ahci_handle_slot(p, p->ccs);
1859		}
1860	}
1861}
1862
1863/*
1864 * blockif callback routine - this runs in the context of the blockif
1865 * i/o thread, so the mutex needs to be acquired.
1866 */
1867static void
1868ata_ioreq_cb(struct blockif_req *br, int err)
1869{
1870	struct ahci_cmd_hdr *hdr;
1871	struct ahci_ioreq *aior;
1872	struct ahci_port *p;
1873	struct pci_ahci_softc *sc;
1874	uint32_t tfd;
1875	uint8_t *cfis;
1876	int slot, ncq, dsm;
1877
1878	DPRINTF("%s %d\n", __func__, err);
1879
1880	ncq = dsm = 0;
1881	aior = br->br_param;
1882	p = aior->io_pr;
1883	cfis = aior->cfis;
1884	slot = aior->slot;
1885	sc = p->pr_sc;
1886	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1887
1888	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1889	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1890	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1891		ncq = 1;
1892	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1893	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1894	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1895		dsm = 1;
1896
1897	pthread_mutex_lock(&sc->mtx);
1898
1899	/*
1900	 * Delete the blockif request from the busy list
1901	 */
1902	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1903
1904	/*
1905	 * Move the blockif request back to the free list
1906	 */
1907	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1908
1909	if (!err)
1910		hdr->prdbc = aior->done;
1911
1912	if (!err && aior->more) {
1913		if (dsm)
1914			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1915		else
1916			ahci_handle_rw(p, slot, cfis, aior->done);
1917		goto out;
1918	}
1919
1920	if (!err)
1921		tfd = ATA_S_READY | ATA_S_DSC;
1922	else
1923		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1924	if (ncq)
1925		ahci_write_fis_sdb(p, slot, cfis, tfd);
1926	else
1927		ahci_write_fis_d2h(p, slot, cfis, tfd);
1928
1929	/*
1930	 * This command is now complete.
1931	 */
1932	p->pending &= ~(1 << slot);
1933
1934	ahci_check_stopped(p);
1935	ahci_handle_port(p);
1936out:
1937	pthread_mutex_unlock(&sc->mtx);
1938	DPRINTF("%s exit\n", __func__);
1939}
1940
1941static void
1942atapi_ioreq_cb(struct blockif_req *br, int err)
1943{
1944	struct ahci_cmd_hdr *hdr;
1945	struct ahci_ioreq *aior;
1946	struct ahci_port *p;
1947	struct pci_ahci_softc *sc;
1948	uint8_t *cfis;
1949	uint32_t tfd;
1950	int slot;
1951
1952	DPRINTF("%s %d\n", __func__, err);
1953
1954	aior = br->br_param;
1955	p = aior->io_pr;
1956	cfis = aior->cfis;
1957	slot = aior->slot;
1958	sc = p->pr_sc;
1959	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1960
1961	pthread_mutex_lock(&sc->mtx);
1962
1963	/*
1964	 * Delete the blockif request from the busy list
1965	 */
1966	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1967
1968	/*
1969	 * Move the blockif request back to the free list
1970	 */
1971	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1972
1973	if (!err)
1974		hdr->prdbc = aior->done;
1975
1976	if (!err && aior->more) {
1977		atapi_read(p, slot, cfis, aior->done);
1978		goto out;
1979	}
1980
1981	if (!err) {
1982		tfd = ATA_S_READY | ATA_S_DSC;
1983	} else {
1984		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1985		p->asc = 0x21;
1986		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1987	}
1988	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1989	ahci_write_fis_d2h(p, slot, cfis, tfd);
1990
1991	/*
1992	 * This command is now complete.
1993	 */
1994	p->pending &= ~(1 << slot);
1995
1996	ahci_check_stopped(p);
1997	ahci_handle_port(p);
1998out:
1999	pthread_mutex_unlock(&sc->mtx);
2000	DPRINTF("%s exit\n", __func__);
2001}
2002
2003static void
2004pci_ahci_ioreq_init(struct ahci_port *pr)
2005{
2006	struct ahci_ioreq *vr;
2007	int i;
2008
2009	pr->ioqsz = blockif_queuesz(pr->bctx);
2010	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2011	STAILQ_INIT(&pr->iofhd);
2012
2013	/*
2014	 * Add all i/o request entries to the free queue
2015	 */
2016	for (i = 0; i < pr->ioqsz; i++) {
2017		vr = &pr->ioreq[i];
2018		vr->io_pr = pr;
2019		if (!pr->atapi)
2020			vr->io_req.br_callback = ata_ioreq_cb;
2021		else
2022			vr->io_req.br_callback = atapi_ioreq_cb;
2023		vr->io_req.br_param = vr;
2024		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2025	}
2026
2027	TAILQ_INIT(&pr->iobhd);
2028}
2029
2030static void
2031pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2032{
2033	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2034	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2035	struct ahci_port *p = &sc->port[port];
2036
2037	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2038		port, offset, value);
2039
2040	switch (offset) {
2041	case AHCI_P_CLB:
2042		p->clb = value;
2043		break;
2044	case AHCI_P_CLBU:
2045		p->clbu = value;
2046		break;
2047	case AHCI_P_FB:
2048		p->fb = value;
2049		break;
2050	case AHCI_P_FBU:
2051		p->fbu = value;
2052		break;
2053	case AHCI_P_IS:
2054		p->is &= ~value;
2055		ahci_port_intr(p);
2056		break;
2057	case AHCI_P_IE:
2058		p->ie = value & 0xFDC000FF;
2059		ahci_port_intr(p);
2060		break;
2061	case AHCI_P_CMD:
2062	{
2063		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2064		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2065		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2066		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2067		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2068		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2069		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2070		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2071
2072		if (!(value & AHCI_P_CMD_ST)) {
2073			ahci_port_stop(p);
2074		} else {
2075			uint64_t clb;
2076
2077			p->cmd |= AHCI_P_CMD_CR;
2078			clb = (uint64_t)p->clbu << 32 | p->clb;
2079			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2080					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2081		}
2082
2083		if (value & AHCI_P_CMD_FRE) {
2084			uint64_t fb;
2085
2086			p->cmd |= AHCI_P_CMD_FR;
2087			fb = (uint64_t)p->fbu << 32 | p->fb;
2088			/* we don't support FBSCP, so rfis size is 256Bytes */
2089			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2090		} else {
2091			p->cmd &= ~AHCI_P_CMD_FR;
2092		}
2093
2094		if (value & AHCI_P_CMD_CLO) {
2095			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2096			p->cmd &= ~AHCI_P_CMD_CLO;
2097		}
2098
2099		if (value & AHCI_P_CMD_ICC_MASK) {
2100			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2101		}
2102
2103		ahci_handle_port(p);
2104		break;
2105	}
2106	case AHCI_P_TFD:
2107	case AHCI_P_SIG:
2108	case AHCI_P_SSTS:
2109		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
2110		break;
2111	case AHCI_P_SCTL:
2112		p->sctl = value;
2113		if (!(p->cmd & AHCI_P_CMD_ST)) {
2114			if (value & ATA_SC_DET_RESET)
2115				ahci_port_reset(p);
2116		}
2117		break;
2118	case AHCI_P_SERR:
2119		p->serr &= ~value;
2120		break;
2121	case AHCI_P_SACT:
2122		p->sact |= value;
2123		break;
2124	case AHCI_P_CI:
2125		p->ci |= value;
2126		ahci_handle_port(p);
2127		break;
2128	case AHCI_P_SNTF:
2129	case AHCI_P_FBS:
2130	default:
2131		break;
2132	}
2133}
2134
2135static void
2136pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2137{
2138	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2139		offset, value);
2140
2141	switch (offset) {
2142	case AHCI_CAP:
2143	case AHCI_PI:
2144	case AHCI_VS:
2145	case AHCI_CAP2:
2146		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2147		break;
2148	case AHCI_GHC:
2149		if (value & AHCI_GHC_HR) {
2150			ahci_reset(sc);
2151			break;
2152		}
2153		if (value & AHCI_GHC_IE)
2154			sc->ghc |= AHCI_GHC_IE;
2155		else
2156			sc->ghc &= ~AHCI_GHC_IE;
2157		ahci_generate_intr(sc, 0xffffffff);
2158		break;
2159	case AHCI_IS:
2160		sc->is &= ~value;
2161		ahci_generate_intr(sc, value);
2162		break;
2163	default:
2164		break;
2165	}
2166}
2167
2168static void
2169pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2170		int baridx, uint64_t offset, int size, uint64_t value)
2171{
2172	struct pci_ahci_softc *sc = pi->pi_arg;
2173
2174	assert(baridx == 5);
2175	assert((offset % 4) == 0 && size == 4);
2176
2177	pthread_mutex_lock(&sc->mtx);
2178
2179	if (offset < AHCI_OFFSET)
2180		pci_ahci_host_write(sc, offset, value);
2181	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2182		pci_ahci_port_write(sc, offset, value);
2183	else
2184		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2185
2186	pthread_mutex_unlock(&sc->mtx);
2187}
2188
2189static uint64_t
2190pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2191{
2192	uint32_t value;
2193
2194	switch (offset) {
2195	case AHCI_CAP:
2196	case AHCI_GHC:
2197	case AHCI_IS:
2198	case AHCI_PI:
2199	case AHCI_VS:
2200	case AHCI_CCCC:
2201	case AHCI_CCCP:
2202	case AHCI_EM_LOC:
2203	case AHCI_EM_CTL:
2204	case AHCI_CAP2:
2205	{
2206		uint32_t *p = &sc->cap;
2207		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2208		value = *p;
2209		break;
2210	}
2211	default:
2212		value = 0;
2213		break;
2214	}
2215	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2216		offset, value);
2217
2218	return (value);
2219}
2220
2221static uint64_t
2222pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2223{
2224	uint32_t value;
2225	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2226	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2227
2228	switch (offset) {
2229	case AHCI_P_CLB:
2230	case AHCI_P_CLBU:
2231	case AHCI_P_FB:
2232	case AHCI_P_FBU:
2233	case AHCI_P_IS:
2234	case AHCI_P_IE:
2235	case AHCI_P_CMD:
2236	case AHCI_P_TFD:
2237	case AHCI_P_SIG:
2238	case AHCI_P_SSTS:
2239	case AHCI_P_SCTL:
2240	case AHCI_P_SERR:
2241	case AHCI_P_SACT:
2242	case AHCI_P_CI:
2243	case AHCI_P_SNTF:
2244	case AHCI_P_FBS:
2245	{
2246		uint32_t *p= &sc->port[port].clb;
2247		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2248		value = *p;
2249		break;
2250	}
2251	default:
2252		value = 0;
2253		break;
2254	}
2255
2256	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2257		port, offset, value);
2258
2259	return value;
2260}
2261
2262static uint64_t
2263pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2264    uint64_t regoff, int size)
2265{
2266	struct pci_ahci_softc *sc = pi->pi_arg;
2267	uint64_t offset;
2268	uint32_t value;
2269
2270	assert(baridx == 5);
2271	assert(size == 1 || size == 2 || size == 4);
2272	assert((regoff & (size - 1)) == 0);
2273
2274	pthread_mutex_lock(&sc->mtx);
2275
2276	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2277	if (offset < AHCI_OFFSET)
2278		value = pci_ahci_host_read(sc, offset);
2279	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2280		value = pci_ahci_port_read(sc, offset);
2281	else {
2282		value = 0;
2283		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n",
2284		    regoff);
2285	}
2286	value >>= 8 * (regoff & 0x3);
2287
2288	pthread_mutex_unlock(&sc->mtx);
2289
2290	return (value);
2291}
2292
2293static int
2294pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2295{
2296	char bident[sizeof("XX:XX:XX")];
2297	struct blockif_ctxt *bctxt;
2298	struct pci_ahci_softc *sc;
2299	int ret, slots, p;
2300	MD5_CTX mdctx;
2301	u_char digest[16];
2302	char *next, *next2;
2303
2304	ret = 0;
2305
2306#ifdef AHCI_DEBUG
2307	dbg = fopen("/tmp/log", "w+");
2308#endif
2309
2310	sc = calloc(1, sizeof(struct pci_ahci_softc));
2311	pi->pi_arg = sc;
2312	sc->asc_pi = pi;
2313	pthread_mutex_init(&sc->mtx, NULL);
2314	sc->ports = 0;
2315	sc->pi = 0;
2316	slots = 32;
2317
2318	for (p = 0; p < MAX_PORTS && opts != NULL; p++, opts = next) {
2319		/* Identify and cut off type of present port. */
2320		if (strncmp(opts, "hd:", 3) == 0) {
2321			atapi = 0;
2322			opts += 3;
2323		} else if (strncmp(opts, "cd:", 3) == 0) {
2324			atapi = 1;
2325			opts += 3;
2326		}
2327
2328		/* Find and cut off the next port options. */
2329		next = strstr(opts, ",hd:");
2330		next2 = strstr(opts, ",cd:");
2331		if (next == NULL || (next2 != NULL && next2 < next))
2332			next = next2;
2333		if (next != NULL) {
2334			next[0] = 0;
2335			next++;
2336		}
2337
2338		if (opts[0] == 0)
2339			continue;
2340
2341		/*
2342		 * Attempt to open the backing image. Use the PCI slot/func
2343		 * and the port number for the identifier string.
2344		 */
2345		snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot,
2346		    pi->pi_func, p);
2347		bctxt = blockif_open(opts, bident);
2348		if (bctxt == NULL) {
2349			sc->ports = p;
2350			ret = 1;
2351			goto open_fail;
2352		}
2353		sc->port[p].bctx = bctxt;
2354		sc->port[p].pr_sc = sc;
2355		sc->port[p].port = p;
2356		sc->port[p].atapi = atapi;
2357
2358		/*
2359		 * Create an identifier for the backing file.
2360		 * Use parts of the md5 sum of the filename
2361		 */
2362		MD5Init(&mdctx);
2363		MD5Update(&mdctx, opts, strlen(opts));
2364		MD5Final(digest, &mdctx);
2365		sprintf(sc->port[p].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2366		    digest[0], digest[1], digest[2], digest[3], digest[4],
2367		    digest[5]);
2368
2369		/*
2370		 * Allocate blockif request structures and add them
2371		 * to the free list
2372		 */
2373		pci_ahci_ioreq_init(&sc->port[p]);
2374
2375		sc->pi |= (1 << p);
2376		if (sc->port[p].ioqsz < slots)
2377			slots = sc->port[p].ioqsz;
2378	}
2379	sc->ports = p;
2380
2381	/* Intel ICH8 AHCI */
2382	--slots;
2383	if (sc->ports < DEF_PORTS)
2384		sc->ports = DEF_PORTS;
2385	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2386	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2387	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2388	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2389	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2390
2391	sc->vs = 0x10300;
2392	sc->cap2 = AHCI_CAP2_APST;
2393	ahci_reset(sc);
2394
2395	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2396	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2397	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2398	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2399	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2400	p = MIN(sc->ports, 16);
2401	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2402	pci_emul_add_msicap(pi, 1 << p);
2403	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2404	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2405
2406	pci_lintr_request(pi);
2407
2408open_fail:
2409	if (ret) {
2410		for (p = 0; p < sc->ports; p++) {
2411			if (sc->port[p].bctx != NULL)
2412				blockif_close(sc->port[p].bctx);
2413		}
2414		free(sc);
2415	}
2416
2417	return (ret);
2418}
2419
2420static int
2421pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2422{
2423
2424	return (pci_ahci_init(ctx, pi, opts, 0));
2425}
2426
2427static int
2428pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2429{
2430
2431	return (pci_ahci_init(ctx, pi, opts, 1));
2432}
2433
2434/*
2435 * Use separate emulation names to distinguish drive and atapi devices
2436 */
2437struct pci_devemu pci_de_ahci = {
2438	.pe_emu =	"ahci",
2439	.pe_init =	pci_ahci_hd_init,
2440	.pe_barwrite =	pci_ahci_write,
2441	.pe_barread =	pci_ahci_read
2442};
2443PCI_EMUL_SET(pci_de_ahci);
2444
2445struct pci_devemu pci_de_ahci_hd = {
2446	.pe_emu =	"ahci-hd",
2447	.pe_init =	pci_ahci_hd_init,
2448	.pe_barwrite =	pci_ahci_write,
2449	.pe_barread =	pci_ahci_read
2450};
2451PCI_EMUL_SET(pci_de_ahci_hd);
2452
2453struct pci_devemu pci_de_ahci_cd = {
2454	.pe_emu =	"ahci-cd",
2455	.pe_init =	pci_ahci_atapi_init,
2456	.pe_barwrite =	pci_ahci_write,
2457	.pe_barread =	pci_ahci_read
2458};
2459PCI_EMUL_SET(pci_de_ahci_cd);
2460