1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
5 * Copyright (c) 2015-2016 Alexander Motin <mav@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/param.h>
31#include <sys/linker_set.h>
32#include <sys/stat.h>
33#include <sys/uio.h>
34#include <sys/ioctl.h>
35#include <sys/disk.h>
36#include <sys/ata.h>
37#include <sys/endian.h>
38
39#include <errno.h>
40#include <fcntl.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <stdint.h>
44#include <string.h>
45#include <strings.h>
46#include <unistd.h>
47#include <assert.h>
48#include <pthread.h>
49#include <pthread_np.h>
50#include <inttypes.h>
51#include <md5.h>
52
53#include "bhyverun.h"
54#include "config.h"
55#include "debug.h"
56#include "pci_emul.h"
57#ifdef BHYVE_SNAPSHOT
58#include "snapshot.h"
59#endif
60#include "ahci.h"
61#include "block_if.h"
62
63#define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
64#define	MAX_PORTS	32	/* AHCI supports 32 ports */
65
66#define	PxSIG_ATA	0x00000101 /* ATA drive */
67#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
68
69enum sata_fis_type {
70	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
71	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
72	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
73	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
74	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
75	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
76	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
77	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
78};
79
80/*
81 * SCSI opcodes
82 */
83#define	TEST_UNIT_READY		0x00
84#define	REQUEST_SENSE		0x03
85#define	INQUIRY			0x12
86#define	START_STOP_UNIT		0x1B
87#define	PREVENT_ALLOW		0x1E
88#define	READ_CAPACITY		0x25
89#define	READ_10			0x28
90#define	POSITION_TO_ELEMENT	0x2B
91#define	READ_TOC		0x43
92#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
93#define	MODE_SENSE_10		0x5A
94#define	REPORT_LUNS		0xA0
95#define	READ_12			0xA8
96#define	READ_CD			0xBE
97
98/*
99 * SCSI mode page codes
100 */
101#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
102#define	MODEPAGE_CD_CAPABILITIES	0x2A
103
104/*
105 * ATA commands
106 */
107#define	ATA_SF_ENAB_SATA_SF		0x10
108#define	ATA_SATA_SF_AN			0x05
109#define	ATA_SF_DIS_SATA_SF		0x90
110
111/*
112 * Debug printf
113 */
114#ifdef AHCI_DEBUG
115static FILE *dbg;
116#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
117#else
118#define DPRINTF(format, arg...)
119#endif
120
121#define AHCI_PORT_IDENT 20 + 1
122
123struct ahci_ioreq {
124	struct blockif_req io_req;
125	struct ahci_port *io_pr;
126	STAILQ_ENTRY(ahci_ioreq) io_flist;
127	TAILQ_ENTRY(ahci_ioreq) io_blist;
128	uint8_t *cfis;
129	uint32_t len;
130	uint32_t done;
131	int slot;
132	int more;
133	int readop;
134};
135
136struct ahci_port {
137	struct blockif_ctxt *bctx;
138	struct pci_ahci_softc *pr_sc;
139	struct ata_params ata_ident;
140	uint8_t *cmd_lst;
141	uint8_t *rfis;
142	int port;
143	int atapi;
144	int reset;
145	int waitforclear;
146	int mult_sectors;
147	uint8_t xfermode;
148	uint8_t err_cfis[20];
149	uint8_t sense_key;
150	uint8_t asc;
151	u_int ccs;
152	uint32_t pending;
153
154	uint32_t clb;
155	uint32_t clbu;
156	uint32_t fb;
157	uint32_t fbu;
158	uint32_t is;
159	uint32_t ie;
160	uint32_t cmd;
161	uint32_t unused0;
162	uint32_t tfd;
163	uint32_t sig;
164	uint32_t ssts;
165	uint32_t sctl;
166	uint32_t serr;
167	uint32_t sact;
168	uint32_t ci;
169	uint32_t sntf;
170	uint32_t fbs;
171
172	/*
173	 * i/o request info
174	 */
175	struct ahci_ioreq *ioreq;
176	int ioqsz;
177	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
178	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
179};
180
181struct ahci_cmd_hdr {
182	uint16_t flags;
183	uint16_t prdtl;
184	uint32_t prdbc;
185	uint64_t ctba;
186	uint32_t reserved[4];
187};
188
189struct ahci_prdt_entry {
190	uint64_t dba;
191	uint32_t reserved;
192#define	DBCMASK		0x3fffff
193	uint32_t dbc;
194};
195
196struct pci_ahci_softc {
197	struct pci_devinst *asc_pi;
198	pthread_mutex_t	mtx;
199	int ports;
200	uint32_t cap;
201	uint32_t ghc;
202	uint32_t is;
203	uint32_t pi;
204	uint32_t vs;
205	uint32_t ccc_ctl;
206	uint32_t ccc_pts;
207	uint32_t em_loc;
208	uint32_t em_ctl;
209	uint32_t cap2;
210	uint32_t bohc;
211	uint32_t lintr;
212	struct ahci_port port[MAX_PORTS];
213};
214#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
215
216static void ahci_handle_port(struct ahci_port *p);
217
218static inline void lba_to_msf(uint8_t *buf, int lba)
219{
220	lba += 150;
221	buf[0] = (lba / 75) / 60;
222	buf[1] = (lba / 75) % 60;
223	buf[2] = lba % 75;
224}
225
226/*
227 * Generate HBA interrupts on global IS register write.
228 */
229static void
230ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask)
231{
232	struct pci_devinst *pi = sc->asc_pi;
233	struct ahci_port *p;
234	int i, nmsg;
235	uint32_t mmask;
236
237	/* Update global IS from PxIS/PxIE. */
238	for (i = 0; i < sc->ports; i++) {
239		p = &sc->port[i];
240		if (p->is & p->ie)
241			sc->is |= (1 << i);
242	}
243	DPRINTF("%s(%08x) %08x", __func__, mask, sc->is);
244
245	/* If there is nothing enabled -- clear legacy interrupt and exit. */
246	if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) {
247		if (sc->lintr) {
248			pci_lintr_deassert(pi);
249			sc->lintr = 0;
250		}
251		return;
252	}
253
254	/* If there is anything and no MSI -- assert legacy interrupt. */
255	nmsg = pci_msi_maxmsgnum(pi);
256	if (nmsg == 0) {
257		if (!sc->lintr) {
258			sc->lintr = 1;
259			pci_lintr_assert(pi);
260		}
261		return;
262	}
263
264	/* Assert respective MSIs for ports that were touched. */
265	for (i = 0; i < nmsg; i++) {
266		if (sc->ports <= nmsg || i < nmsg - 1)
267			mmask = 1 << i;
268		else
269			mmask = 0xffffffff << i;
270		if (sc->is & mask && mmask & mask)
271			pci_generate_msi(pi, i);
272	}
273}
274
275/*
276 * Generate HBA interrupt on specific port event.
277 */
278static void
279ahci_port_intr(struct ahci_port *p)
280{
281	struct pci_ahci_softc *sc = p->pr_sc;
282	struct pci_devinst *pi = sc->asc_pi;
283	int nmsg;
284
285	DPRINTF("%s(%d) %08x/%08x %08x", __func__,
286	    p->port, p->is, p->ie, sc->is);
287
288	/* If there is nothing enabled -- we are done. */
289	if ((p->is & p->ie) == 0)
290		return;
291
292	/* In case of non-shared MSI always generate interrupt. */
293	nmsg = pci_msi_maxmsgnum(pi);
294	if (sc->ports <= nmsg || p->port < nmsg - 1) {
295		sc->is |= (1 << p->port);
296		if ((sc->ghc & AHCI_GHC_IE) == 0)
297			return;
298		pci_generate_msi(pi, p->port);
299		return;
300	}
301
302	/* If IS for this port is already set -- do nothing. */
303	if (sc->is & (1 << p->port))
304		return;
305
306	sc->is |= (1 << p->port);
307
308	/* If interrupts are enabled -- generate one. */
309	if ((sc->ghc & AHCI_GHC_IE) == 0)
310		return;
311	if (nmsg > 0) {
312		pci_generate_msi(pi, nmsg - 1);
313	} else if (!sc->lintr) {
314		sc->lintr = 1;
315		pci_lintr_assert(pi);
316	}
317}
318
319static void
320ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
321{
322	int offset, len, irq;
323
324	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
325		return;
326
327	switch (ft) {
328	case FIS_TYPE_REGD2H:
329		offset = 0x40;
330		len = 20;
331		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
332		break;
333	case FIS_TYPE_SETDEVBITS:
334		offset = 0x58;
335		len = 8;
336		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
337		break;
338	case FIS_TYPE_PIOSETUP:
339		offset = 0x20;
340		len = 20;
341		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
342		break;
343	default:
344		EPRINTLN("unsupported fis type %d", ft);
345		return;
346	}
347	if (fis[2] & ATA_S_ERROR) {
348		p->waitforclear = 1;
349		irq |= AHCI_P_IX_TFE;
350	}
351	memcpy(p->rfis + offset, fis, len);
352	if (irq) {
353		if (~p->is & irq) {
354			p->is |= irq;
355			ahci_port_intr(p);
356		}
357	}
358}
359
360static void
361ahci_write_fis_piosetup(struct ahci_port *p)
362{
363	uint8_t fis[20];
364
365	memset(fis, 0, sizeof(fis));
366	fis[0] = FIS_TYPE_PIOSETUP;
367	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
368}
369
370static void
371ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
372{
373	uint8_t fis[8];
374	uint8_t error;
375
376	error = (tfd >> 8) & 0xff;
377	tfd &= 0x77;
378	memset(fis, 0, sizeof(fis));
379	fis[0] = FIS_TYPE_SETDEVBITS;
380	fis[1] = (1 << 6);
381	fis[2] = tfd;
382	fis[3] = error;
383	if (fis[2] & ATA_S_ERROR) {
384		p->err_cfis[0] = slot;
385		p->err_cfis[2] = tfd;
386		p->err_cfis[3] = error;
387		memcpy(&p->err_cfis[4], cfis + 4, 16);
388	} else {
389		*(uint32_t *)(fis + 4) = (1 << slot);
390		p->sact &= ~(1 << slot);
391	}
392	p->tfd &= ~0x77;
393	p->tfd |= tfd;
394	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
395}
396
397static void
398ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
399{
400	uint8_t fis[20];
401	uint8_t error;
402
403	error = (tfd >> 8) & 0xff;
404	memset(fis, 0, sizeof(fis));
405	fis[0] = FIS_TYPE_REGD2H;
406	fis[1] = (1 << 6);
407	fis[2] = tfd & 0xff;
408	fis[3] = error;
409	fis[4] = cfis[4];
410	fis[5] = cfis[5];
411	fis[6] = cfis[6];
412	fis[7] = cfis[7];
413	fis[8] = cfis[8];
414	fis[9] = cfis[9];
415	fis[10] = cfis[10];
416	fis[11] = cfis[11];
417	fis[12] = cfis[12];
418	fis[13] = cfis[13];
419	if (fis[2] & ATA_S_ERROR) {
420		p->err_cfis[0] = 0x80;
421		p->err_cfis[2] = tfd & 0xff;
422		p->err_cfis[3] = error;
423		memcpy(&p->err_cfis[4], cfis + 4, 16);
424	} else
425		p->ci &= ~(1 << slot);
426	p->tfd = tfd;
427	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
428}
429
430static void
431ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
432{
433	uint8_t fis[20];
434
435	p->tfd = ATA_S_READY | ATA_S_DSC;
436	memset(fis, 0, sizeof(fis));
437	fis[0] = FIS_TYPE_REGD2H;
438	fis[1] = 0;			/* No interrupt */
439	fis[2] = p->tfd;		/* Status */
440	fis[3] = 0;			/* No error */
441	p->ci &= ~(1 << slot);
442	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
443}
444
445static void
446ahci_write_reset_fis_d2h(struct ahci_port *p)
447{
448	uint8_t fis[20];
449
450	memset(fis, 0, sizeof(fis));
451	fis[0] = FIS_TYPE_REGD2H;
452	fis[3] = 1;
453	fis[4] = 1;
454	if (p->atapi) {
455		fis[5] = 0x14;
456		fis[6] = 0xeb;
457	}
458	fis[12] = 1;
459	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
460}
461
462static void
463ahci_check_stopped(struct ahci_port *p)
464{
465	/*
466	 * If we are no longer processing the command list and nothing
467	 * is in-flight, clear the running bit, the current command
468	 * slot, the command issue and active bits.
469	 */
470	if (!(p->cmd & AHCI_P_CMD_ST)) {
471		if (p->pending == 0) {
472			p->ccs = 0;
473			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
474			p->ci = 0;
475			p->sact = 0;
476			p->waitforclear = 0;
477		}
478	}
479}
480
481static void
482ahci_port_stop(struct ahci_port *p)
483{
484	struct ahci_ioreq *aior;
485	uint8_t *cfis;
486	int slot;
487	int error;
488
489	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
490
491	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
492		/*
493		 * Try to cancel the outstanding blockif request.
494		 */
495		error = blockif_cancel(p->bctx, &aior->io_req);
496		if (error != 0)
497			continue;
498
499		slot = aior->slot;
500		cfis = aior->cfis;
501		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
502		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
503		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
504			p->sact &= ~(1 << slot);	/* NCQ */
505		else
506			p->ci &= ~(1 << slot);
507
508		/*
509		 * This command is now done.
510		 */
511		p->pending &= ~(1 << slot);
512
513		/*
514		 * Delete the blockif request from the busy list
515		 */
516		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
517
518		/*
519		 * Move the blockif request back to the free list
520		 */
521		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
522	}
523
524	ahci_check_stopped(p);
525}
526
527static void
528ahci_port_reset(struct ahci_port *pr)
529{
530	pr->serr = 0;
531	pr->sact = 0;
532	pr->xfermode = ATA_UDMA6;
533	pr->mult_sectors = 128;
534
535	if (!pr->bctx) {
536		pr->ssts = ATA_SS_DET_NO_DEVICE;
537		pr->sig = 0xFFFFFFFF;
538		pr->tfd = 0x7F;
539		return;
540	}
541	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
542	if (pr->sctl & ATA_SC_SPD_MASK)
543		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
544	else
545		pr->ssts |= ATA_SS_SPD_GEN3;
546	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
547	if (!pr->atapi) {
548		pr->sig = PxSIG_ATA;
549		pr->tfd |= ATA_S_READY;
550	} else
551		pr->sig = PxSIG_ATAPI;
552	ahci_write_reset_fis_d2h(pr);
553}
554
555static void
556ahci_reset(struct pci_ahci_softc *sc)
557{
558	int i;
559
560	sc->ghc = AHCI_GHC_AE;
561	sc->is = 0;
562
563	if (sc->lintr) {
564		pci_lintr_deassert(sc->asc_pi);
565		sc->lintr = 0;
566	}
567
568	for (i = 0; i < sc->ports; i++) {
569		sc->port[i].ie = 0;
570		sc->port[i].is = 0;
571		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
572		if (sc->port[i].bctx)
573			sc->port[i].cmd |= AHCI_P_CMD_CPS;
574		sc->port[i].sctl = 0;
575		ahci_port_reset(&sc->port[i]);
576	}
577}
578
579static void
580ata_string(uint8_t *dest, const char *src, int len)
581{
582	int i;
583
584	for (i = 0; i < len; i++) {
585		if (*src)
586			dest[i ^ 1] = *src++;
587		else
588			dest[i ^ 1] = ' ';
589	}
590}
591
592static void
593atapi_string(uint8_t *dest, const char *src, int len)
594{
595	int i;
596
597	for (i = 0; i < len; i++) {
598		if (*src)
599			dest[i] = *src++;
600		else
601			dest[i] = ' ';
602	}
603}
604
605/*
606 * Build up the iovec based on the PRDT, 'done' and 'len'.
607 */
608static void
609ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
610    struct ahci_prdt_entry *prdt, uint16_t prdtl)
611{
612	struct blockif_req *breq = &aior->io_req;
613	uint32_t dbcsz, extra, left, skip, todo;
614	int i, j;
615
616	assert(aior->len >= aior->done);
617
618	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
619	skip = aior->done;
620	left = aior->len - aior->done;
621	todo = 0;
622	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
623	    i++, prdt++) {
624		dbcsz = (prdt->dbc & DBCMASK) + 1;
625		/* Skip already done part of the PRDT */
626		if (dbcsz <= skip) {
627			skip -= dbcsz;
628			continue;
629		}
630		dbcsz -= skip;
631		if (dbcsz > left)
632			dbcsz = left;
633		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
634		    prdt->dba + skip, dbcsz);
635		breq->br_iov[j].iov_len = dbcsz;
636		todo += dbcsz;
637		left -= dbcsz;
638		skip = 0;
639		j++;
640	}
641
642	/* If we got limited by IOV length, round I/O down to sector size. */
643	if (j == BLOCKIF_IOV_MAX) {
644		extra = todo % blockif_sectsz(p->bctx);
645		todo -= extra;
646		assert(todo > 0);
647		while (extra > 0) {
648			if (breq->br_iov[j - 1].iov_len > extra) {
649				breq->br_iov[j - 1].iov_len -= extra;
650				break;
651			}
652			extra -= breq->br_iov[j - 1].iov_len;
653			j--;
654		}
655	}
656
657	breq->br_iovcnt = j;
658	breq->br_resid = todo;
659	aior->done += todo;
660	aior->more = (aior->done < aior->len && i < prdtl);
661}
662
663static void
664ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
665{
666	struct ahci_ioreq *aior;
667	struct blockif_req *breq;
668	struct ahci_prdt_entry *prdt;
669	struct ahci_cmd_hdr *hdr;
670	uint64_t lba;
671	uint32_t len;
672	int err, first, ncq, readop;
673
674	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
675	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
676	ncq = 0;
677	readop = 1;
678	first = (done == 0);
679
680	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
681	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
682	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
683	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
684		readop = 0;
685
686	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
687	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
688		lba = ((uint64_t)cfis[10] << 40) |
689			((uint64_t)cfis[9] << 32) |
690			((uint64_t)cfis[8] << 24) |
691			((uint64_t)cfis[6] << 16) |
692			((uint64_t)cfis[5] << 8) |
693			cfis[4];
694		len = cfis[11] << 8 | cfis[3];
695		if (!len)
696			len = 65536;
697		ncq = 1;
698	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
699	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
700	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
701		lba = ((uint64_t)cfis[10] << 40) |
702			((uint64_t)cfis[9] << 32) |
703			((uint64_t)cfis[8] << 24) |
704			((uint64_t)cfis[6] << 16) |
705			((uint64_t)cfis[5] << 8) |
706			cfis[4];
707		len = cfis[13] << 8 | cfis[12];
708		if (!len)
709			len = 65536;
710	} else {
711		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
712			(cfis[5] << 8) | cfis[4];
713		len = cfis[12];
714		if (!len)
715			len = 256;
716	}
717	lba *= blockif_sectsz(p->bctx);
718	len *= blockif_sectsz(p->bctx);
719
720	/* Pull request off free list */
721	aior = STAILQ_FIRST(&p->iofhd);
722	assert(aior != NULL);
723	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
724
725	aior->cfis = cfis;
726	aior->slot = slot;
727	aior->len = len;
728	aior->done = done;
729	aior->readop = readop;
730	breq = &aior->io_req;
731	breq->br_offset = lba + done;
732	ahci_build_iov(p, aior, prdt, hdr->prdtl);
733
734	/* Mark this command in-flight. */
735	p->pending |= 1 << slot;
736
737	/* Stuff request onto busy list. */
738	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
739
740	if (ncq && first)
741		ahci_write_fis_d2h_ncq(p, slot);
742
743	if (readop)
744		err = blockif_read(p->bctx, breq);
745	else
746		err = blockif_write(p->bctx, breq);
747	assert(err == 0);
748}
749
750static void
751ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
752{
753	struct ahci_ioreq *aior;
754	struct blockif_req *breq;
755	int err;
756
757	/*
758	 * Pull request off free list
759	 */
760	aior = STAILQ_FIRST(&p->iofhd);
761	assert(aior != NULL);
762	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
763	aior->cfis = cfis;
764	aior->slot = slot;
765	aior->len = 0;
766	aior->done = 0;
767	aior->more = 0;
768	breq = &aior->io_req;
769
770	/*
771	 * Mark this command in-flight.
772	 */
773	p->pending |= 1 << slot;
774
775	/*
776	 * Stuff request onto busy list
777	 */
778	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
779
780	err = blockif_flush(p->bctx, breq);
781	assert(err == 0);
782}
783
784static inline void
785read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
786    unsigned int size)
787{
788	struct ahci_cmd_hdr *hdr;
789	struct ahci_prdt_entry *prdt;
790	uint8_t *to;
791	unsigned int len;
792	int i;
793
794	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
795	len = size;
796	to = buf;
797	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
798	for (i = 0; i < hdr->prdtl && len; i++) {
799		uint8_t *ptr;
800		uint32_t dbcsz;
801		unsigned int sublen;
802
803		dbcsz = (prdt->dbc & DBCMASK) + 1;
804		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
805		sublen = MIN(len, dbcsz);
806		memcpy(to, ptr, sublen);
807		len -= sublen;
808		to += sublen;
809		prdt++;
810	}
811}
812
813static void
814ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
815{
816	struct ahci_ioreq *aior;
817	struct blockif_req *breq;
818	uint8_t *entry;
819	uint64_t elba;
820	uint32_t len, elen;
821	int err, first, ncq;
822	uint8_t buf[512];
823
824	first = (done == 0);
825	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
826		len = (uint16_t)cfis[13] << 8 | cfis[12];
827		len *= 512;
828		ncq = 0;
829	} else { /* ATA_SEND_FPDMA_QUEUED */
830		len = (uint16_t)cfis[11] << 8 | cfis[3];
831		len *= 512;
832		ncq = 1;
833	}
834	read_prdt(p, slot, cfis, buf, sizeof(buf));
835
836next:
837	entry = &buf[done];
838	elba = ((uint64_t)entry[5] << 40) |
839		((uint64_t)entry[4] << 32) |
840		((uint64_t)entry[3] << 24) |
841		((uint64_t)entry[2] << 16) |
842		((uint64_t)entry[1] << 8) |
843		entry[0];
844	elen = (uint16_t)entry[7] << 8 | entry[6];
845	done += 8;
846	if (elen == 0) {
847		if (done >= len) {
848			if (ncq) {
849				if (first)
850					ahci_write_fis_d2h_ncq(p, slot);
851				ahci_write_fis_sdb(p, slot, cfis,
852				    ATA_S_READY | ATA_S_DSC);
853			} else {
854				ahci_write_fis_d2h(p, slot, cfis,
855				    ATA_S_READY | ATA_S_DSC);
856			}
857			p->pending &= ~(1 << slot);
858			ahci_check_stopped(p);
859			if (!first)
860				ahci_handle_port(p);
861			return;
862		}
863		goto next;
864	}
865
866	/*
867	 * Pull request off free list
868	 */
869	aior = STAILQ_FIRST(&p->iofhd);
870	assert(aior != NULL);
871	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
872	aior->cfis = cfis;
873	aior->slot = slot;
874	aior->len = len;
875	aior->done = done;
876	aior->more = (len != done);
877
878	breq = &aior->io_req;
879	breq->br_offset = elba * blockif_sectsz(p->bctx);
880	breq->br_resid = elen * blockif_sectsz(p->bctx);
881
882	/*
883	 * Mark this command in-flight.
884	 */
885	p->pending |= 1 << slot;
886
887	/*
888	 * Stuff request onto busy list
889	 */
890	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
891
892	if (ncq && first)
893		ahci_write_fis_d2h_ncq(p, slot);
894
895	err = blockif_delete(p->bctx, breq);
896	assert(err == 0);
897}
898
899static inline void
900write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf,
901    unsigned int size)
902{
903	struct ahci_cmd_hdr *hdr;
904	struct ahci_prdt_entry *prdt;
905	uint8_t *from;
906	unsigned int len;
907	int i;
908
909	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
910	len = size;
911	from = buf;
912	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
913	for (i = 0; i < hdr->prdtl && len; i++) {
914		uint8_t *ptr;
915		uint32_t dbcsz;
916		int sublen;
917
918		dbcsz = (prdt->dbc & DBCMASK) + 1;
919		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
920		sublen = MIN(len, dbcsz);
921		memcpy(ptr, from, sublen);
922		len -= sublen;
923		from += sublen;
924		prdt++;
925	}
926	hdr->prdbc = size - len;
927}
928
929static void
930ahci_checksum(uint8_t *buf, int size)
931{
932	int i;
933	uint8_t sum = 0;
934
935	for (i = 0; i < size - 1; i++)
936		sum += buf[i];
937	buf[size - 1] = 0x100 - sum;
938}
939
940static void
941ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
942{
943	struct ahci_cmd_hdr *hdr;
944	uint32_t buf[128];
945	uint8_t *buf8 = (uint8_t *)buf;
946	uint16_t *buf16 = (uint16_t *)buf;
947
948	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
949	if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 ||
950	    cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
951		ahci_write_fis_d2h(p, slot, cfis,
952		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
953		return;
954	}
955
956	memset(buf, 0, sizeof(buf));
957	if (cfis[4] == 0x00) {	/* Log directory */
958		buf16[0x00] = 1; /* Version -- 1 */
959		buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */
960		buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */
961	} else if (cfis[4] == 0x10) {	/* NCQ Command Error Log */
962		memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
963		ahci_checksum(buf8, sizeof(buf));
964	} else if (cfis[4] == 0x13) {	/* SATA NCQ Send and Receive Log */
965		if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
966			buf[0x00] = 1;	/* SFQ DSM supported */
967			buf[0x01] = 1;	/* SFQ DSM TRIM supported */
968		}
969	} else {
970		ahci_write_fis_d2h(p, slot, cfis,
971		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
972		return;
973	}
974
975	if (cfis[2] == ATA_READ_LOG_EXT)
976		ahci_write_fis_piosetup(p);
977	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
978	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
979}
980
981static void
982handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
983{
984	struct ahci_cmd_hdr *hdr;
985
986	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
987	if (p->atapi || hdr->prdtl == 0) {
988		ahci_write_fis_d2h(p, slot, cfis,
989		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
990	} else {
991		ahci_write_fis_piosetup(p);
992		write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params));
993		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
994	}
995}
996
997static void
998ata_identify_init(struct ahci_port* p, int atapi)
999{
1000	struct ata_params* ata_ident = &p->ata_ident;
1001
1002	if (atapi) {
1003		ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM |
1004		    ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST;
1005		ata_ident->capabilities1 = ATA_SUPPORT_LBA |
1006			ATA_SUPPORT_DMA;
1007		ata_ident->capabilities2 = (1 << 14 | 1);
1008		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1009		ata_ident->obsolete62 = 0x3f;
1010		ata_ident->mwdmamodes = 7;
1011		if (p->xfermode & ATA_WDMA0)
1012			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1013		ata_ident->apiomodes = 3;
1014		ata_ident->mwdmamin = 0x0078;
1015		ata_ident->mwdmarec = 0x0078;
1016		ata_ident->pioblind = 0x0078;
1017		ata_ident->pioiordy = 0x0078;
1018		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1019		ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1020		ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM;
1021		ata_ident->version_major = 0x3f0;
1022		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1023			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1024		ata_ident->support.command2 = (1 << 14);
1025		ata_ident->support.extension = (1 << 14);
1026		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1027			ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1028		ata_ident->enabled.extension = (1 << 14);
1029		ata_ident->udmamodes = 0x7f;
1030		if (p->xfermode & ATA_UDMA0)
1031			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1032		ata_ident->transport_major = 0x1020;
1033		ata_ident->integrity = 0x00a5;
1034	} else {
1035		uint64_t sectors;
1036		int sectsz, psectsz, psectoff, candelete, ro;
1037		uint16_t cyl;
1038		uint8_t sech, heads;
1039
1040		ro = blockif_is_ro(p->bctx);
1041		candelete = blockif_candelete(p->bctx);
1042		sectsz = blockif_sectsz(p->bctx);
1043		sectors = blockif_size(p->bctx) / sectsz;
1044		blockif_chs(p->bctx, &cyl, &heads, &sech);
1045		blockif_psectsz(p->bctx, &psectsz, &psectoff);
1046		ata_ident->config = ATA_DRQ_FAST;
1047		ata_ident->cylinders = cyl;
1048		ata_ident->heads = heads;
1049		ata_ident->sectors = sech;
1050
1051		ata_ident->sectors_intr = (0x8000 | 128);
1052		ata_ident->tcg = 0;
1053
1054		ata_ident->capabilities1 = ATA_SUPPORT_DMA |
1055			ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY;
1056		ata_ident->capabilities2 = (1 << 14);
1057		ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88;
1058		if (p->mult_sectors)
1059			ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors);
1060		if (sectors <= 0x0fffffff) {
1061			ata_ident->lba_size_1 = sectors;
1062			ata_ident->lba_size_2 = (sectors >> 16);
1063		} else {
1064			ata_ident->lba_size_1 = 0xffff;
1065			ata_ident->lba_size_2 = 0x0fff;
1066		}
1067		ata_ident->mwdmamodes = 0x7;
1068		if (p->xfermode & ATA_WDMA0)
1069			ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8));
1070		ata_ident->apiomodes = 0x3;
1071		ata_ident->mwdmamin = 0x0078;
1072		ata_ident->mwdmarec = 0x0078;
1073		ata_ident->pioblind = 0x0078;
1074		ata_ident->pioiordy = 0x0078;
1075		ata_ident->support3 = 0;
1076		ata_ident->queue = 31;
1077		ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
1078			ATA_SUPPORT_NCQ);
1079		ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
1080			(p->ssts & ATA_SS_SPD_MASK) >> 3);
1081		ata_ident->version_major = 0x3f0;
1082		ata_ident->version_minor = 0x28;
1083		ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1084			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1085		ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1086			ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
1087		ata_ident->support.extension = (1 << 14);
1088		ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE |
1089			ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
1090		ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
1091			ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
1092		ata_ident->enabled.extension = (1 << 14);
1093		ata_ident->udmamodes = 0x7f;
1094		if (p->xfermode & ATA_UDMA0)
1095			ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8));
1096		ata_ident->lba_size48_1 = sectors;
1097		ata_ident->lba_size48_2 = (sectors >> 16);
1098		ata_ident->lba_size48_3 = (sectors >> 32);
1099		ata_ident->lba_size48_4 = (sectors >> 48);
1100
1101		if (candelete && !ro) {
1102			ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
1103			ata_ident->max_dsm_blocks = 1;
1104			ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM;
1105		}
1106		ata_ident->pss = ATA_PSS_VALID_VALUE;
1107		ata_ident->lsalign = 0x4000;
1108		if (psectsz > sectsz) {
1109			ata_ident->pss |= ATA_PSS_MULTLS;
1110			ata_ident->pss |= ffsl(psectsz / sectsz) - 1;
1111			ata_ident->lsalign |= (psectoff / sectsz);
1112		}
1113		if (sectsz > 512) {
1114			ata_ident->pss |= ATA_PSS_LSSABOVE512;
1115			ata_ident->lss_1 = sectsz / 2;
1116			ata_ident->lss_2 = ((sectsz / 2) >> 16);
1117		}
1118		ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1119		ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1120		ata_ident->transport_major = 0x1020;
1121		ata_ident->integrity = 0x00a5;
1122	}
1123	ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params));
1124}
1125
1126static void
1127handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1128{
1129	if (!p->atapi) {
1130		ahci_write_fis_d2h(p, slot, cfis,
1131		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1132	} else {
1133		ahci_write_fis_piosetup(p);
1134		write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params));
1135		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1136	}
1137}
1138
1139static void
1140atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1141{
1142	uint8_t buf[36];
1143	uint8_t *acmd;
1144	unsigned int len;
1145	uint32_t tfd;
1146
1147	acmd = cfis + 0x40;
1148
1149	if (acmd[1] & 1) {		/* VPD */
1150		if (acmd[2] == 0) {	/* Supported VPD pages */
1151			buf[0] = 0x05;
1152			buf[1] = 0;
1153			buf[2] = 0;
1154			buf[3] = 1;
1155			buf[4] = 0;
1156			len = 4 + buf[3];
1157		} else {
1158			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1159			p->asc = 0x24;
1160			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1161			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1162			ahci_write_fis_d2h(p, slot, cfis, tfd);
1163			return;
1164		}
1165	} else {
1166		buf[0] = 0x05;
1167		buf[1] = 0x80;
1168		buf[2] = 0x00;
1169		buf[3] = 0x21;
1170		buf[4] = 31;
1171		buf[5] = 0;
1172		buf[6] = 0;
1173		buf[7] = 0;
1174		atapi_string(buf + 8, "BHYVE", 8);
1175		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1176		atapi_string(buf + 32, "001", 4);
1177		len = sizeof(buf);
1178	}
1179
1180	if (len > acmd[4])
1181		len = acmd[4];
1182	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1183	write_prdt(p, slot, cfis, buf, len);
1184	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1185}
1186
1187static void
1188atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1189{
1190	uint8_t buf[8];
1191	uint64_t sectors;
1192
1193	sectors = blockif_size(p->bctx) / 2048;
1194	be32enc(buf, sectors - 1);
1195	be32enc(buf + 4, 2048);
1196	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1197	write_prdt(p, slot, cfis, buf, sizeof(buf));
1198	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1199}
1200
1201static void
1202atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1203{
1204	uint8_t *acmd;
1205	uint8_t format;
1206	unsigned int len;
1207
1208	acmd = cfis + 0x40;
1209
1210	len = be16dec(acmd + 7);
1211	format = acmd[9] >> 6;
1212	switch (format) {
1213	case 0:
1214	{
1215		size_t size;
1216		int msf;
1217		uint64_t sectors;
1218		uint8_t start_track, buf[20], *bp;
1219
1220		msf = (acmd[1] >> 1) & 1;
1221		start_track = acmd[6];
1222		if (start_track > 1 && start_track != 0xaa) {
1223			uint32_t tfd;
1224			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1225			p->asc = 0x24;
1226			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1227			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1228			ahci_write_fis_d2h(p, slot, cfis, tfd);
1229			return;
1230		}
1231		bp = buf + 2;
1232		*bp++ = 1;
1233		*bp++ = 1;
1234		if (start_track <= 1) {
1235			*bp++ = 0;
1236			*bp++ = 0x14;
1237			*bp++ = 1;
1238			*bp++ = 0;
1239			if (msf) {
1240				*bp++ = 0;
1241				lba_to_msf(bp, 0);
1242				bp += 3;
1243			} else {
1244				*bp++ = 0;
1245				*bp++ = 0;
1246				*bp++ = 0;
1247				*bp++ = 0;
1248			}
1249		}
1250		*bp++ = 0;
1251		*bp++ = 0x14;
1252		*bp++ = 0xaa;
1253		*bp++ = 0;
1254		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1255		sectors >>= 2;
1256		if (msf) {
1257			*bp++ = 0;
1258			lba_to_msf(bp, sectors);
1259			bp += 3;
1260		} else {
1261			be32enc(bp, sectors);
1262			bp += 4;
1263		}
1264		size = bp - buf;
1265		be16enc(buf, size - 2);
1266		if (len > size)
1267			len = size;
1268		write_prdt(p, slot, cfis, buf, len);
1269		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1270		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1271		break;
1272	}
1273	case 1:
1274	{
1275		uint8_t buf[12];
1276
1277		memset(buf, 0, sizeof(buf));
1278		buf[1] = 0xa;
1279		buf[2] = 0x1;
1280		buf[3] = 0x1;
1281		if (len > sizeof(buf))
1282			len = sizeof(buf);
1283		write_prdt(p, slot, cfis, buf, len);
1284		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1285		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1286		break;
1287	}
1288	case 2:
1289	{
1290		size_t size;
1291		int msf;
1292		uint64_t sectors;
1293		uint8_t *bp, buf[50];
1294
1295		msf = (acmd[1] >> 1) & 1;
1296		bp = buf + 2;
1297		*bp++ = 1;
1298		*bp++ = 1;
1299
1300		*bp++ = 1;
1301		*bp++ = 0x14;
1302		*bp++ = 0;
1303		*bp++ = 0xa0;
1304		*bp++ = 0;
1305		*bp++ = 0;
1306		*bp++ = 0;
1307		*bp++ = 0;
1308		*bp++ = 1;
1309		*bp++ = 0;
1310		*bp++ = 0;
1311
1312		*bp++ = 1;
1313		*bp++ = 0x14;
1314		*bp++ = 0;
1315		*bp++ = 0xa1;
1316		*bp++ = 0;
1317		*bp++ = 0;
1318		*bp++ = 0;
1319		*bp++ = 0;
1320		*bp++ = 1;
1321		*bp++ = 0;
1322		*bp++ = 0;
1323
1324		*bp++ = 1;
1325		*bp++ = 0x14;
1326		*bp++ = 0;
1327		*bp++ = 0xa2;
1328		*bp++ = 0;
1329		*bp++ = 0;
1330		*bp++ = 0;
1331		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1332		sectors >>= 2;
1333		if (msf) {
1334			*bp++ = 0;
1335			lba_to_msf(bp, sectors);
1336			bp += 3;
1337		} else {
1338			be32enc(bp, sectors);
1339			bp += 4;
1340		}
1341
1342		*bp++ = 1;
1343		*bp++ = 0x14;
1344		*bp++ = 0;
1345		*bp++ = 1;
1346		*bp++ = 0;
1347		*bp++ = 0;
1348		*bp++ = 0;
1349		if (msf) {
1350			*bp++ = 0;
1351			lba_to_msf(bp, 0);
1352			bp += 3;
1353		} else {
1354			*bp++ = 0;
1355			*bp++ = 0;
1356			*bp++ = 0;
1357			*bp++ = 0;
1358		}
1359
1360		size = bp - buf;
1361		be16enc(buf, size - 2);
1362		if (len > size)
1363			len = size;
1364		write_prdt(p, slot, cfis, buf, len);
1365		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1366		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1367		break;
1368	}
1369	default:
1370	{
1371		uint32_t tfd;
1372
1373		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1374		p->asc = 0x24;
1375		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1376		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1377		ahci_write_fis_d2h(p, slot, cfis, tfd);
1378		break;
1379	}
1380	}
1381}
1382
1383static void
1384atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1385{
1386	uint8_t buf[16];
1387
1388	memset(buf, 0, sizeof(buf));
1389	buf[3] = 8;
1390
1391	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1392	write_prdt(p, slot, cfis, buf, sizeof(buf));
1393	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1394}
1395
1396static void
1397atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1398{
1399	struct ahci_ioreq *aior;
1400	struct ahci_cmd_hdr *hdr;
1401	struct ahci_prdt_entry *prdt;
1402	struct blockif_req *breq;
1403	uint8_t *acmd;
1404	uint64_t lba;
1405	uint32_t len;
1406	int err;
1407
1408	acmd = cfis + 0x40;
1409	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1410	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1411
1412	lba = be32dec(acmd + 2);
1413	if (acmd[0] == READ_10)
1414		len = be16dec(acmd + 7);
1415	else
1416		len = be32dec(acmd + 6);
1417	if (len == 0) {
1418		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1419		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1420	}
1421	lba *= 2048;
1422	len *= 2048;
1423
1424	/*
1425	 * Pull request off free list
1426	 */
1427	aior = STAILQ_FIRST(&p->iofhd);
1428	assert(aior != NULL);
1429	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1430	aior->cfis = cfis;
1431	aior->slot = slot;
1432	aior->len = len;
1433	aior->done = done;
1434	aior->readop = 1;
1435	breq = &aior->io_req;
1436	breq->br_offset = lba + done;
1437	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1438
1439	/* Mark this command in-flight. */
1440	p->pending |= 1 << slot;
1441
1442	/* Stuff request onto busy list. */
1443	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1444
1445	err = blockif_read(p->bctx, breq);
1446	assert(err == 0);
1447}
1448
1449static void
1450atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1451{
1452	uint8_t buf[64];
1453	uint8_t *acmd;
1454	unsigned int len;
1455
1456	acmd = cfis + 0x40;
1457	len = acmd[4];
1458	if (len > sizeof(buf))
1459		len = sizeof(buf);
1460	memset(buf, 0, len);
1461	buf[0] = 0x70 | (1 << 7);
1462	buf[2] = p->sense_key;
1463	buf[7] = 10;
1464	buf[12] = p->asc;
1465	write_prdt(p, slot, cfis, buf, len);
1466	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1467	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1468}
1469
1470static void
1471atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1472{
1473	uint8_t *acmd = cfis + 0x40;
1474	uint32_t tfd;
1475
1476	switch (acmd[4] & 3) {
1477	case 0:
1478	case 1:
1479	case 3:
1480		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1481		tfd = ATA_S_READY | ATA_S_DSC;
1482		break;
1483	case 2:
1484		/* TODO eject media */
1485		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1486		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1487		p->asc = 0x53;
1488		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1489		break;
1490	}
1491	ahci_write_fis_d2h(p, slot, cfis, tfd);
1492}
1493
1494static void
1495atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1496{
1497	uint8_t *acmd;
1498	uint32_t tfd;
1499	uint8_t pc, code;
1500	unsigned int len;
1501
1502	acmd = cfis + 0x40;
1503	len = be16dec(acmd + 7);
1504	pc = acmd[2] >> 6;
1505	code = acmd[2] & 0x3f;
1506
1507	switch (pc) {
1508	case 0:
1509		switch (code) {
1510		case MODEPAGE_RW_ERROR_RECOVERY:
1511		{
1512			uint8_t buf[16];
1513
1514			if (len > sizeof(buf))
1515				len = sizeof(buf);
1516
1517			memset(buf, 0, sizeof(buf));
1518			be16enc(buf, 16 - 2);
1519			buf[2] = 0x70;
1520			buf[8] = 0x01;
1521			buf[9] = 16 - 10;
1522			buf[11] = 0x05;
1523			write_prdt(p, slot, cfis, buf, len);
1524			tfd = ATA_S_READY | ATA_S_DSC;
1525			break;
1526		}
1527		case MODEPAGE_CD_CAPABILITIES:
1528		{
1529			uint8_t buf[30];
1530
1531			if (len > sizeof(buf))
1532				len = sizeof(buf);
1533
1534			memset(buf, 0, sizeof(buf));
1535			be16enc(buf, 30 - 2);
1536			buf[2] = 0x70;
1537			buf[8] = 0x2A;
1538			buf[9] = 30 - 10;
1539			buf[10] = 0x08;
1540			buf[12] = 0x71;
1541			be16enc(&buf[18], 2);
1542			be16enc(&buf[20], 512);
1543			write_prdt(p, slot, cfis, buf, len);
1544			tfd = ATA_S_READY | ATA_S_DSC;
1545			break;
1546		}
1547		default:
1548			goto error;
1549			break;
1550		}
1551		break;
1552	case 3:
1553		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1554		p->asc = 0x39;
1555		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1556		break;
1557error:
1558	case 1:
1559	case 2:
1560		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1561		p->asc = 0x24;
1562		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1563		break;
1564	}
1565	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1566	ahci_write_fis_d2h(p, slot, cfis, tfd);
1567}
1568
1569static void
1570atapi_get_event_status_notification(struct ahci_port *p, int slot,
1571    uint8_t *cfis)
1572{
1573	uint8_t *acmd;
1574	uint32_t tfd;
1575
1576	acmd = cfis + 0x40;
1577
1578	/* we don't support asynchronous operation */
1579	if (!(acmd[1] & 1)) {
1580		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1581		p->asc = 0x24;
1582		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1583	} else {
1584		uint8_t buf[8];
1585		unsigned int len;
1586
1587		len = be16dec(acmd + 7);
1588		if (len > sizeof(buf))
1589			len = sizeof(buf);
1590
1591		memset(buf, 0, sizeof(buf));
1592		be16enc(buf, 8 - 2);
1593		buf[2] = 0x04;
1594		buf[3] = 0x10;
1595		buf[5] = 0x02;
1596		write_prdt(p, slot, cfis, buf, len);
1597		tfd = ATA_S_READY | ATA_S_DSC;
1598	}
1599	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1600	ahci_write_fis_d2h(p, slot, cfis, tfd);
1601}
1602
1603static void
1604handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1605{
1606	uint8_t *acmd;
1607
1608	acmd = cfis + 0x40;
1609
1610#ifdef AHCI_DEBUG
1611	{
1612		int i;
1613		DPRINTF("ACMD:");
1614		for (i = 0; i < 16; i++)
1615			DPRINTF("%02x ", acmd[i]);
1616		DPRINTF("");
1617	}
1618#endif
1619
1620	switch (acmd[0]) {
1621	case TEST_UNIT_READY:
1622		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1623		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1624		break;
1625	case INQUIRY:
1626		atapi_inquiry(p, slot, cfis);
1627		break;
1628	case READ_CAPACITY:
1629		atapi_read_capacity(p, slot, cfis);
1630		break;
1631	case PREVENT_ALLOW:
1632		/* TODO */
1633		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1634		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1635		break;
1636	case READ_TOC:
1637		atapi_read_toc(p, slot, cfis);
1638		break;
1639	case REPORT_LUNS:
1640		atapi_report_luns(p, slot, cfis);
1641		break;
1642	case READ_10:
1643	case READ_12:
1644		atapi_read(p, slot, cfis, 0);
1645		break;
1646	case REQUEST_SENSE:
1647		atapi_request_sense(p, slot, cfis);
1648		break;
1649	case START_STOP_UNIT:
1650		atapi_start_stop_unit(p, slot, cfis);
1651		break;
1652	case MODE_SENSE_10:
1653		atapi_mode_sense(p, slot, cfis);
1654		break;
1655	case GET_EVENT_STATUS_NOTIFICATION:
1656		atapi_get_event_status_notification(p, slot, cfis);
1657		break;
1658	default:
1659		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1660		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1661		p->asc = 0x20;
1662		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1663				ATA_S_READY | ATA_S_ERROR);
1664		break;
1665	}
1666}
1667
1668static void
1669ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1670{
1671
1672	p->tfd |= ATA_S_BUSY;
1673	switch (cfis[2]) {
1674	case ATA_ATA_IDENTIFY:
1675		handle_identify(p, slot, cfis);
1676		break;
1677	case ATA_SETFEATURES:
1678	{
1679		switch (cfis[3]) {
1680		case ATA_SF_ENAB_SATA_SF:
1681			switch (cfis[12]) {
1682			case ATA_SATA_SF_AN:
1683				p->tfd = ATA_S_DSC | ATA_S_READY;
1684				break;
1685			default:
1686				p->tfd = ATA_S_ERROR | ATA_S_READY;
1687				p->tfd |= (ATA_ERROR_ABORT << 8);
1688				break;
1689			}
1690			break;
1691		case ATA_SF_ENAB_WCACHE:
1692		case ATA_SF_DIS_WCACHE:
1693		case ATA_SF_ENAB_RCACHE:
1694		case ATA_SF_DIS_RCACHE:
1695			p->tfd = ATA_S_DSC | ATA_S_READY;
1696			break;
1697		case ATA_SF_SETXFER:
1698		{
1699			switch (cfis[12] & 0xf8) {
1700			case ATA_PIO:
1701			case ATA_PIO0:
1702				break;
1703			case ATA_WDMA0:
1704			case ATA_UDMA0:
1705				p->xfermode = (cfis[12] & 0x7);
1706				break;
1707			}
1708			p->tfd = ATA_S_DSC | ATA_S_READY;
1709			break;
1710		}
1711		default:
1712			p->tfd = ATA_S_ERROR | ATA_S_READY;
1713			p->tfd |= (ATA_ERROR_ABORT << 8);
1714			break;
1715		}
1716		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1717		break;
1718	}
1719	case ATA_SET_MULTI:
1720		if (cfis[12] != 0 &&
1721			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1722			p->tfd = ATA_S_ERROR | ATA_S_READY;
1723			p->tfd |= (ATA_ERROR_ABORT << 8);
1724		} else {
1725			p->mult_sectors = cfis[12];
1726			p->tfd = ATA_S_DSC | ATA_S_READY;
1727		}
1728		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1729		break;
1730	case ATA_READ:
1731	case ATA_WRITE:
1732	case ATA_READ48:
1733	case ATA_WRITE48:
1734	case ATA_READ_MUL:
1735	case ATA_WRITE_MUL:
1736	case ATA_READ_MUL48:
1737	case ATA_WRITE_MUL48:
1738	case ATA_READ_DMA:
1739	case ATA_WRITE_DMA:
1740	case ATA_READ_DMA48:
1741	case ATA_WRITE_DMA48:
1742	case ATA_READ_FPDMA_QUEUED:
1743	case ATA_WRITE_FPDMA_QUEUED:
1744		ahci_handle_rw(p, slot, cfis, 0);
1745		break;
1746	case ATA_FLUSHCACHE:
1747	case ATA_FLUSHCACHE48:
1748		ahci_handle_flush(p, slot, cfis);
1749		break;
1750	case ATA_DATA_SET_MANAGEMENT:
1751		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1752		    cfis[13] == 0 && cfis[12] == 1) {
1753			ahci_handle_dsm_trim(p, slot, cfis, 0);
1754			break;
1755		}
1756		ahci_write_fis_d2h(p, slot, cfis,
1757		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1758		break;
1759	case ATA_SEND_FPDMA_QUEUED:
1760		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1761		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1762		    cfis[11] == 0 && cfis[3] == 1) {
1763			ahci_handle_dsm_trim(p, slot, cfis, 0);
1764			break;
1765		}
1766		ahci_write_fis_d2h(p, slot, cfis,
1767		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1768		break;
1769	case ATA_READ_LOG_EXT:
1770	case ATA_READ_LOG_DMA_EXT:
1771		ahci_handle_read_log(p, slot, cfis);
1772		break;
1773	case ATA_SECURITY_FREEZE_LOCK:
1774	case ATA_SMART_CMD:
1775	case ATA_NOP:
1776		ahci_write_fis_d2h(p, slot, cfis,
1777		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1778		break;
1779	case ATA_CHECK_POWER_MODE:
1780		cfis[12] = 0xff;	/* always on */
1781		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1782		break;
1783	case ATA_STANDBY_CMD:
1784	case ATA_STANDBY_IMMEDIATE:
1785	case ATA_IDLE_CMD:
1786	case ATA_IDLE_IMMEDIATE:
1787	case ATA_SLEEP:
1788	case ATA_READ_VERIFY:
1789	case ATA_READ_VERIFY48:
1790		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1791		break;
1792	case ATA_ATAPI_IDENTIFY:
1793		handle_atapi_identify(p, slot, cfis);
1794		break;
1795	case ATA_PACKET_CMD:
1796		if (!p->atapi) {
1797			ahci_write_fis_d2h(p, slot, cfis,
1798			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1799		} else
1800			handle_packet_cmd(p, slot, cfis);
1801		break;
1802	default:
1803		EPRINTLN("Unsupported cmd:%02x", cfis[2]);
1804		ahci_write_fis_d2h(p, slot, cfis,
1805		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1806		break;
1807	}
1808}
1809
1810static void
1811ahci_handle_slot(struct ahci_port *p, int slot)
1812{
1813	struct ahci_cmd_hdr *hdr;
1814#ifdef AHCI_DEBUG
1815	struct ahci_prdt_entry *prdt;
1816#endif
1817	struct pci_ahci_softc *sc;
1818	uint8_t *cfis;
1819#ifdef AHCI_DEBUG
1820	int cfl, i;
1821#endif
1822
1823	sc = p->pr_sc;
1824	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1825#ifdef AHCI_DEBUG
1826	cfl = (hdr->flags & 0x1f) * 4;
1827#endif
1828	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1829			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1830#ifdef AHCI_DEBUG
1831	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1832
1833	DPRINTF("cfis:");
1834	for (i = 0; i < cfl; i++) {
1835		if (i % 10 == 0)
1836			DPRINTF("");
1837		DPRINTF("%02x ", cfis[i]);
1838	}
1839	DPRINTF("");
1840
1841	for (i = 0; i < hdr->prdtl; i++) {
1842		DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba);
1843		prdt++;
1844	}
1845#endif
1846
1847	if (cfis[0] != FIS_TYPE_REGH2D) {
1848		EPRINTLN("Not a H2D FIS:%02x", cfis[0]);
1849		return;
1850	}
1851
1852	if (cfis[1] & 0x80) {
1853		ahci_handle_cmd(p, slot, cfis);
1854	} else {
1855		if (cfis[15] & (1 << 2))
1856			p->reset = 1;
1857		else if (p->reset) {
1858			p->reset = 0;
1859			ahci_port_reset(p);
1860		}
1861		p->ci &= ~(1 << slot);
1862	}
1863}
1864
1865static void
1866ahci_handle_port(struct ahci_port *p)
1867{
1868
1869	if (!(p->cmd & AHCI_P_CMD_ST))
1870		return;
1871
1872	/*
1873	 * Search for any new commands to issue ignoring those that
1874	 * are already in-flight.  Stop if device is busy or in error.
1875	 */
1876	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1877		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1878			break;
1879		if (p->waitforclear)
1880			break;
1881		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1882			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1883			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1884			ahci_handle_slot(p, p->ccs);
1885		}
1886	}
1887}
1888
1889/*
1890 * blockif callback routine - this runs in the context of the blockif
1891 * i/o thread, so the mutex needs to be acquired.
1892 */
1893static void
1894ata_ioreq_cb(struct blockif_req *br, int err)
1895{
1896	struct ahci_cmd_hdr *hdr;
1897	struct ahci_ioreq *aior;
1898	struct ahci_port *p;
1899	struct pci_ahci_softc *sc;
1900	uint32_t tfd;
1901	uint8_t *cfis;
1902	int slot, ncq, dsm;
1903
1904	DPRINTF("%s %d", __func__, err);
1905
1906	ncq = dsm = 0;
1907	aior = br->br_param;
1908	p = aior->io_pr;
1909	cfis = aior->cfis;
1910	slot = aior->slot;
1911	sc = p->pr_sc;
1912	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1913
1914	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1915	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1916	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1917		ncq = 1;
1918	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1919	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1920	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1921		dsm = 1;
1922
1923	pthread_mutex_lock(&sc->mtx);
1924
1925	/*
1926	 * Delete the blockif request from the busy list
1927	 */
1928	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1929
1930	/*
1931	 * Move the blockif request back to the free list
1932	 */
1933	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1934
1935	if (!err)
1936		hdr->prdbc = aior->done;
1937
1938	if (!err && aior->more) {
1939		if (dsm)
1940			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1941		else
1942			ahci_handle_rw(p, slot, cfis, aior->done);
1943		goto out;
1944	}
1945
1946	if (!err)
1947		tfd = ATA_S_READY | ATA_S_DSC;
1948	else
1949		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1950	if (ncq)
1951		ahci_write_fis_sdb(p, slot, cfis, tfd);
1952	else
1953		ahci_write_fis_d2h(p, slot, cfis, tfd);
1954
1955	/*
1956	 * This command is now complete.
1957	 */
1958	p->pending &= ~(1 << slot);
1959
1960	ahci_check_stopped(p);
1961	ahci_handle_port(p);
1962out:
1963	pthread_mutex_unlock(&sc->mtx);
1964	DPRINTF("%s exit", __func__);
1965}
1966
1967static void
1968atapi_ioreq_cb(struct blockif_req *br, int err)
1969{
1970	struct ahci_cmd_hdr *hdr;
1971	struct ahci_ioreq *aior;
1972	struct ahci_port *p;
1973	struct pci_ahci_softc *sc;
1974	uint8_t *cfis;
1975	uint32_t tfd;
1976	int slot;
1977
1978	DPRINTF("%s %d", __func__, err);
1979
1980	aior = br->br_param;
1981	p = aior->io_pr;
1982	cfis = aior->cfis;
1983	slot = aior->slot;
1984	sc = p->pr_sc;
1985	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1986
1987	pthread_mutex_lock(&sc->mtx);
1988
1989	/*
1990	 * Delete the blockif request from the busy list
1991	 */
1992	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1993
1994	/*
1995	 * Move the blockif request back to the free list
1996	 */
1997	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1998
1999	if (!err)
2000		hdr->prdbc = aior->done;
2001
2002	if (!err && aior->more) {
2003		atapi_read(p, slot, cfis, aior->done);
2004		goto out;
2005	}
2006
2007	if (!err) {
2008		tfd = ATA_S_READY | ATA_S_DSC;
2009	} else {
2010		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
2011		p->asc = 0x21;
2012		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
2013	}
2014	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
2015	ahci_write_fis_d2h(p, slot, cfis, tfd);
2016
2017	/*
2018	 * This command is now complete.
2019	 */
2020	p->pending &= ~(1 << slot);
2021
2022	ahci_check_stopped(p);
2023	ahci_handle_port(p);
2024out:
2025	pthread_mutex_unlock(&sc->mtx);
2026	DPRINTF("%s exit", __func__);
2027}
2028
2029static void
2030pci_ahci_ioreq_init(struct ahci_port *pr)
2031{
2032	struct ahci_ioreq *vr;
2033	int i;
2034
2035	pr->ioqsz = blockif_queuesz(pr->bctx);
2036	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
2037	STAILQ_INIT(&pr->iofhd);
2038
2039	/*
2040	 * Add all i/o request entries to the free queue
2041	 */
2042	for (i = 0; i < pr->ioqsz; i++) {
2043		vr = &pr->ioreq[i];
2044		vr->io_pr = pr;
2045		if (!pr->atapi)
2046			vr->io_req.br_callback = ata_ioreq_cb;
2047		else
2048			vr->io_req.br_callback = atapi_ioreq_cb;
2049		vr->io_req.br_param = vr;
2050		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
2051	}
2052
2053	TAILQ_INIT(&pr->iobhd);
2054}
2055
2056static void
2057pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2058{
2059	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2060	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2061	struct ahci_port *p = &sc->port[port];
2062
2063	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2064		port, offset, value);
2065
2066	switch (offset) {
2067	case AHCI_P_CLB:
2068		p->clb = value;
2069		break;
2070	case AHCI_P_CLBU:
2071		p->clbu = value;
2072		break;
2073	case AHCI_P_FB:
2074		p->fb = value;
2075		break;
2076	case AHCI_P_FBU:
2077		p->fbu = value;
2078		break;
2079	case AHCI_P_IS:
2080		p->is &= ~value;
2081		ahci_port_intr(p);
2082		break;
2083	case AHCI_P_IE:
2084		p->ie = value & 0xFDC000FF;
2085		ahci_port_intr(p);
2086		break;
2087	case AHCI_P_CMD:
2088	{
2089		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2090		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2091		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2092		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2093		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2094		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2095		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2096		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2097
2098		if (!(value & AHCI_P_CMD_ST)) {
2099			ahci_port_stop(p);
2100		} else {
2101			uint64_t clb;
2102
2103			p->cmd |= AHCI_P_CMD_CR;
2104			clb = (uint64_t)p->clbu << 32 | p->clb;
2105			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2106					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2107		}
2108
2109		if (value & AHCI_P_CMD_FRE) {
2110			uint64_t fb;
2111
2112			p->cmd |= AHCI_P_CMD_FR;
2113			fb = (uint64_t)p->fbu << 32 | p->fb;
2114			/* we don't support FBSCP, so rfis size is 256Bytes */
2115			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2116		} else {
2117			p->cmd &= ~AHCI_P_CMD_FR;
2118		}
2119
2120		if (value & AHCI_P_CMD_CLO) {
2121			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2122			p->cmd &= ~AHCI_P_CMD_CLO;
2123		}
2124
2125		if (value & AHCI_P_CMD_ICC_MASK) {
2126			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2127		}
2128
2129		ahci_handle_port(p);
2130		break;
2131	}
2132	case AHCI_P_TFD:
2133	case AHCI_P_SIG:
2134	case AHCI_P_SSTS:
2135		EPRINTLN("pci_ahci_port: read only registers 0x%"PRIx64"", offset);
2136		break;
2137	case AHCI_P_SCTL:
2138		p->sctl = value;
2139		if (!(p->cmd & AHCI_P_CMD_ST)) {
2140			if (value & ATA_SC_DET_RESET)
2141				ahci_port_reset(p);
2142		}
2143		break;
2144	case AHCI_P_SERR:
2145		p->serr &= ~value;
2146		break;
2147	case AHCI_P_SACT:
2148		p->sact |= value;
2149		break;
2150	case AHCI_P_CI:
2151		p->ci |= value;
2152		ahci_handle_port(p);
2153		break;
2154	case AHCI_P_SNTF:
2155	case AHCI_P_FBS:
2156	default:
2157		break;
2158	}
2159}
2160
2161static void
2162pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2163{
2164	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"",
2165		offset, value);
2166
2167	switch (offset) {
2168	case AHCI_CAP:
2169	case AHCI_PI:
2170	case AHCI_VS:
2171	case AHCI_CAP2:
2172		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset);
2173		break;
2174	case AHCI_GHC:
2175		if (value & AHCI_GHC_HR) {
2176			ahci_reset(sc);
2177			break;
2178		}
2179		if (value & AHCI_GHC_IE)
2180			sc->ghc |= AHCI_GHC_IE;
2181		else
2182			sc->ghc &= ~AHCI_GHC_IE;
2183		ahci_generate_intr(sc, 0xffffffff);
2184		break;
2185	case AHCI_IS:
2186		sc->is &= ~value;
2187		ahci_generate_intr(sc, value);
2188		break;
2189	default:
2190		break;
2191	}
2192}
2193
2194static void
2195pci_ahci_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2196    uint64_t value)
2197{
2198	struct pci_ahci_softc *sc = pi->pi_arg;
2199
2200	assert(baridx == 5);
2201	assert((offset % 4) == 0 && size == 4);
2202
2203	pthread_mutex_lock(&sc->mtx);
2204
2205	if (offset < AHCI_OFFSET)
2206		pci_ahci_host_write(sc, offset, value);
2207	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2208		pci_ahci_port_write(sc, offset, value);
2209	else
2210		EPRINTLN("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset);
2211
2212	pthread_mutex_unlock(&sc->mtx);
2213}
2214
2215static uint64_t
2216pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2217{
2218	uint32_t value;
2219
2220	switch (offset) {
2221	case AHCI_CAP:
2222	case AHCI_GHC:
2223	case AHCI_IS:
2224	case AHCI_PI:
2225	case AHCI_VS:
2226	case AHCI_CCCC:
2227	case AHCI_CCCP:
2228	case AHCI_EM_LOC:
2229	case AHCI_EM_CTL:
2230	case AHCI_CAP2:
2231	{
2232		uint32_t *p = &sc->cap;
2233		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2234		value = *p;
2235		break;
2236	}
2237	default:
2238		value = 0;
2239		break;
2240	}
2241	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x",
2242		offset, value);
2243
2244	return (value);
2245}
2246
2247static uint64_t
2248pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2249{
2250	uint32_t value;
2251	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2252	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2253
2254	switch (offset) {
2255	case AHCI_P_CLB:
2256	case AHCI_P_CLBU:
2257	case AHCI_P_FB:
2258	case AHCI_P_FBU:
2259	case AHCI_P_IS:
2260	case AHCI_P_IE:
2261	case AHCI_P_CMD:
2262	case AHCI_P_TFD:
2263	case AHCI_P_SIG:
2264	case AHCI_P_SSTS:
2265	case AHCI_P_SCTL:
2266	case AHCI_P_SERR:
2267	case AHCI_P_SACT:
2268	case AHCI_P_CI:
2269	case AHCI_P_SNTF:
2270	case AHCI_P_FBS:
2271	{
2272		uint32_t *p= &sc->port[port].clb;
2273		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2274		value = *p;
2275		break;
2276	}
2277	default:
2278		value = 0;
2279		break;
2280	}
2281
2282	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x",
2283		port, offset, value);
2284
2285	return value;
2286}
2287
2288static uint64_t
2289pci_ahci_read(struct pci_devinst *pi, int baridx, uint64_t regoff, int size)
2290{
2291	struct pci_ahci_softc *sc = pi->pi_arg;
2292	uint64_t offset;
2293	uint32_t value;
2294
2295	assert(baridx == 5);
2296	assert(size == 1 || size == 2 || size == 4);
2297	assert((regoff & (size - 1)) == 0);
2298
2299	pthread_mutex_lock(&sc->mtx);
2300
2301	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2302	if (offset < AHCI_OFFSET)
2303		value = pci_ahci_host_read(sc, offset);
2304	else if (offset < (uint64_t)AHCI_OFFSET + sc->ports * AHCI_STEP)
2305		value = pci_ahci_port_read(sc, offset);
2306	else {
2307		value = 0;
2308		EPRINTLN("pci_ahci: unknown i/o read offset 0x%"PRIx64"",
2309		    regoff);
2310	}
2311	value >>= 8 * (regoff & 0x3);
2312
2313	pthread_mutex_unlock(&sc->mtx);
2314
2315	return (value);
2316}
2317
2318/*
2319 * Each AHCI controller has a "port" node which contains nodes for
2320 * each port named after the decimal number of the port (no leading
2321 * zeroes).  Port nodes contain a "type" ("hd" or "cd"), as well as
2322 * options for blockif.  For example:
2323 *
2324 * pci.0.1.0
2325 *          .device="ahci"
2326 *          .port
2327 *               .0
2328 *                 .type="hd"
2329 *                 .path="/path/to/image"
2330 */
2331static int
2332pci_ahci_legacy_config_port(nvlist_t *nvl, int port, const char *type,
2333    const char *opts)
2334{
2335	char node_name[sizeof("XX")];
2336	nvlist_t *port_nvl;
2337
2338	snprintf(node_name, sizeof(node_name), "%d", port);
2339	port_nvl = create_relative_config_node(nvl, node_name);
2340	set_config_value_node(port_nvl, "type", type);
2341	return (blockif_legacy_config(port_nvl, opts));
2342}
2343
2344static int
2345pci_ahci_legacy_config(nvlist_t *nvl, const char *opts)
2346{
2347	nvlist_t *ports_nvl;
2348	const char *type;
2349	char *next, *next2, *str, *tofree;
2350	int p, ret;
2351
2352	if (opts == NULL)
2353		return (0);
2354
2355	ports_nvl = create_relative_config_node(nvl, "port");
2356	ret = 1;
2357	tofree = str = strdup(opts);
2358	for (p = 0; p < MAX_PORTS && str != NULL; p++, str = next) {
2359		/* Identify and cut off type of present port. */
2360		if (strncmp(str, "hd:", 3) == 0) {
2361			type = "hd";
2362			str += 3;
2363		} else if (strncmp(str, "cd:", 3) == 0) {
2364			type = "cd";
2365			str += 3;
2366		} else
2367			type = NULL;
2368
2369		/* Find and cut off the next port options. */
2370		next = strstr(str, ",hd:");
2371		next2 = strstr(str, ",cd:");
2372		if (next == NULL || (next2 != NULL && next2 < next))
2373			next = next2;
2374		if (next != NULL) {
2375			next[0] = 0;
2376			next++;
2377		}
2378
2379		if (str[0] == 0)
2380			continue;
2381
2382		if (type == NULL) {
2383			EPRINTLN("Missing or invalid type for port %d: \"%s\"",
2384			    p, str);
2385			goto out;
2386		}
2387
2388		if (pci_ahci_legacy_config_port(ports_nvl, p, type, str) != 0)
2389			goto out;
2390	}
2391	ret = 0;
2392out:
2393	free(tofree);
2394	return (ret);
2395}
2396
2397static int
2398pci_ahci_cd_legacy_config(nvlist_t *nvl, const char *opts)
2399{
2400	nvlist_t *ports_nvl;
2401
2402	ports_nvl = create_relative_config_node(nvl, "port");
2403	return (pci_ahci_legacy_config_port(ports_nvl, 0, "cd", opts));
2404}
2405
2406static int
2407pci_ahci_hd_legacy_config(nvlist_t *nvl, const char *opts)
2408{
2409	nvlist_t *ports_nvl;
2410
2411	ports_nvl = create_relative_config_node(nvl, "port");
2412	return (pci_ahci_legacy_config_port(ports_nvl, 0, "hd", opts));
2413}
2414
2415static int
2416pci_ahci_init(struct pci_devinst *pi, nvlist_t *nvl)
2417{
2418	char bident[sizeof("XXX:XXX:XXX")];
2419	char node_name[sizeof("XX")];
2420	struct blockif_ctxt *bctxt;
2421	struct pci_ahci_softc *sc;
2422	int atapi, ret, slots, p;
2423	MD5_CTX mdctx;
2424	u_char digest[16];
2425	const char *path, *type, *value;
2426	nvlist_t *ports_nvl, *port_nvl;
2427
2428	ret = 0;
2429
2430#ifdef AHCI_DEBUG
2431	dbg = fopen("/tmp/log", "w+");
2432#endif
2433
2434	sc = calloc(1, sizeof(struct pci_ahci_softc));
2435	pi->pi_arg = sc;
2436	sc->asc_pi = pi;
2437	pthread_mutex_init(&sc->mtx, NULL);
2438	sc->ports = 0;
2439	sc->pi = 0;
2440	slots = 32;
2441
2442	ports_nvl = find_relative_config_node(nvl, "port");
2443	for (p = 0; ports_nvl != NULL && p < MAX_PORTS; p++) {
2444		struct ata_params *ata_ident = &sc->port[p].ata_ident;
2445		char ident[AHCI_PORT_IDENT];
2446
2447		snprintf(node_name, sizeof(node_name), "%d", p);
2448		port_nvl = find_relative_config_node(ports_nvl, node_name);
2449		if (port_nvl == NULL)
2450			continue;
2451
2452		type = get_config_value_node(port_nvl, "type");
2453		if (type == NULL)
2454			continue;
2455
2456		if (strcmp(type, "hd") == 0)
2457			atapi = 0;
2458		else
2459			atapi = 1;
2460
2461		/*
2462		 * Attempt to open the backing image. Use the PCI slot/func
2463		 * and the port number for the identifier string.
2464		 */
2465		snprintf(bident, sizeof(bident), "%u:%u:%u", pi->pi_slot,
2466		    pi->pi_func, p);
2467
2468		bctxt = blockif_open(port_nvl, bident);
2469		if (bctxt == NULL) {
2470			sc->ports = p;
2471			ret = 1;
2472			goto open_fail;
2473		}
2474
2475		ret = blockif_add_boot_device(pi, bctxt);
2476		if (ret) {
2477			sc->ports = p;
2478			goto open_fail;
2479		}
2480
2481		sc->port[p].bctx = bctxt;
2482		sc->port[p].pr_sc = sc;
2483		sc->port[p].port = p;
2484		sc->port[p].atapi = atapi;
2485
2486		/*
2487		 * Create an identifier for the backing file.
2488		 * Use parts of the md5 sum of the filename
2489		 */
2490		path = get_config_value_node(port_nvl, "path");
2491		MD5Init(&mdctx);
2492		MD5Update(&mdctx, path, strlen(path));
2493		MD5Final(digest, &mdctx);
2494		snprintf(ident, AHCI_PORT_IDENT,
2495			"BHYVE-%02X%02X-%02X%02X-%02X%02X",
2496			digest[0], digest[1], digest[2], digest[3], digest[4],
2497			digest[5]);
2498
2499		memset(ata_ident, 0, sizeof(struct ata_params));
2500		ata_string((uint8_t*)&ata_ident->serial, ident, 20);
2501		ata_string((uint8_t*)&ata_ident->revision, "001", 8);
2502		if (atapi)
2503			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40);
2504		else
2505			ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40);
2506		value = get_config_value_node(port_nvl, "nmrr");
2507		if (value != NULL)
2508			ata_ident->media_rotation_rate = atoi(value);
2509		value = get_config_value_node(port_nvl, "ser");
2510		if (value != NULL)
2511			ata_string((uint8_t*)(&ata_ident->serial), value, 20);
2512		value = get_config_value_node(port_nvl, "rev");
2513		if (value != NULL)
2514			ata_string((uint8_t*)(&ata_ident->revision), value, 8);
2515		value = get_config_value_node(port_nvl, "model");
2516		if (value != NULL)
2517			ata_string((uint8_t*)(&ata_ident->model), value, 40);
2518		ata_identify_init(&sc->port[p], atapi);
2519
2520		/*
2521		 * Allocate blockif request structures and add them
2522		 * to the free list
2523		 */
2524		pci_ahci_ioreq_init(&sc->port[p]);
2525
2526		sc->pi |= (1 << p);
2527		if (sc->port[p].ioqsz < slots)
2528			slots = sc->port[p].ioqsz;
2529	}
2530	sc->ports = p;
2531
2532	/* Intel ICH8 AHCI */
2533	--slots;
2534	if (sc->ports < DEF_PORTS)
2535		sc->ports = DEF_PORTS;
2536	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2537	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2538	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2539	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2540	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2541
2542	sc->vs = 0x10300;
2543	sc->cap2 = AHCI_CAP2_APST;
2544	ahci_reset(sc);
2545
2546	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2547	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2548	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2549	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2550	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2551	p = MIN(sc->ports, 16);
2552	p = flsl(p) - ((p & (p - 1)) ? 0 : 1);
2553	pci_emul_add_msicap(pi, 1 << p);
2554	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2555	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2556
2557	pci_lintr_request(pi);
2558
2559open_fail:
2560	if (ret) {
2561		for (p = 0; p < sc->ports; p++) {
2562			if (sc->port[p].bctx != NULL)
2563				blockif_close(sc->port[p].bctx);
2564		}
2565		free(sc);
2566	}
2567
2568	return (ret);
2569}
2570
2571#ifdef BHYVE_SNAPSHOT
2572static int
2573pci_ahci_snapshot(struct vm_snapshot_meta *meta)
2574{
2575	int i, ret;
2576	void *bctx;
2577	struct pci_devinst *pi;
2578	struct pci_ahci_softc *sc;
2579	struct ahci_port *port;
2580
2581	pi = meta->dev_data;
2582	sc = pi->pi_arg;
2583
2584	/* TODO: add mtx lock/unlock */
2585
2586	SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
2587	SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
2588	SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
2589	SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
2590	SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
2591	SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
2592	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
2593	SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
2594	SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
2595	SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
2596	SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
2597	SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
2598	SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
2599
2600	for (i = 0; i < MAX_PORTS; i++) {
2601		port = &sc->port[i];
2602
2603		if (meta->op == VM_SNAPSHOT_SAVE)
2604			bctx = port->bctx;
2605
2606		SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
2607		SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
2608
2609		/* Mostly for restore; save is ensured by the lines above. */
2610		if (((bctx == NULL) && (port->bctx != NULL)) ||
2611		    ((bctx != NULL) && (port->bctx == NULL))) {
2612			EPRINTLN("%s: ports not matching", __func__);
2613			ret = EINVAL;
2614			goto done;
2615		}
2616
2617		if (port->bctx == NULL)
2618			continue;
2619
2620		if (port->port != i) {
2621			EPRINTLN("%s: ports not matching: "
2622			    "actual: %d expected: %d", __func__, port->port, i);
2623			ret = EINVAL;
2624			goto done;
2625		}
2626
2627		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->cmd_lst,
2628			AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
2629		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(pi->pi_vmctx, port->rfis, 256,
2630		    false, meta, ret, done);
2631
2632		SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done);
2633		SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
2634		SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
2635		SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
2636		SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
2637		SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
2638		SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
2639		SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
2640		SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
2641		SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
2642		SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
2643
2644		SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
2645		SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
2646		SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
2647		SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
2648		SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
2649		SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
2650		SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
2651		SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
2652		SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
2653		SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
2654		SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
2655		SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
2656		SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
2657		SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
2658		SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
2659		SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
2660		SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
2661
2662		assert(TAILQ_EMPTY(&port->iobhd));
2663	}
2664
2665done:
2666	return (ret);
2667}
2668
2669static int
2670pci_ahci_pause(struct pci_devinst *pi)
2671{
2672	struct pci_ahci_softc *sc;
2673	struct blockif_ctxt *bctxt;
2674	int i;
2675
2676	sc = pi->pi_arg;
2677
2678	for (i = 0; i < MAX_PORTS; i++) {
2679		bctxt = sc->port[i].bctx;
2680		if (bctxt == NULL)
2681			continue;
2682
2683		blockif_pause(bctxt);
2684	}
2685
2686	return (0);
2687}
2688
2689static int
2690pci_ahci_resume(struct pci_devinst *pi)
2691{
2692	struct pci_ahci_softc *sc;
2693	struct blockif_ctxt *bctxt;
2694	int i;
2695
2696	sc = pi->pi_arg;
2697
2698	for (i = 0; i < MAX_PORTS; i++) {
2699		bctxt = sc->port[i].bctx;
2700		if (bctxt == NULL)
2701			continue;
2702
2703		blockif_resume(bctxt);
2704	}
2705
2706	return (0);
2707}
2708#endif	/* BHYVE_SNAPSHOT */
2709
2710/*
2711 * Use separate emulation names to distinguish drive and atapi devices
2712 */
2713static const struct pci_devemu pci_de_ahci = {
2714	.pe_emu =	"ahci",
2715	.pe_init =	pci_ahci_init,
2716	.pe_legacy_config = pci_ahci_legacy_config,
2717	.pe_barwrite =	pci_ahci_write,
2718	.pe_barread =	pci_ahci_read,
2719#ifdef BHYVE_SNAPSHOT
2720	.pe_snapshot =	pci_ahci_snapshot,
2721	.pe_pause =	pci_ahci_pause,
2722	.pe_resume =	pci_ahci_resume,
2723#endif
2724};
2725PCI_EMUL_SET(pci_de_ahci);
2726
2727static const struct pci_devemu pci_de_ahci_hd = {
2728	.pe_emu =	"ahci-hd",
2729	.pe_legacy_config = pci_ahci_hd_legacy_config,
2730	.pe_alias =	"ahci",
2731};
2732PCI_EMUL_SET(pci_de_ahci_hd);
2733
2734static const struct pci_devemu pci_de_ahci_cd = {
2735	.pe_emu =	"ahci-cd",
2736	.pe_legacy_config = pci_ahci_cd_legacy_config,
2737	.pe_alias =	"ahci",
2738};
2739PCI_EMUL_SET(pci_de_ahci_cd);
2740