pci_ahci.c revision 303139
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 303139 2016-07-21 11:58:47Z mav $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 303139 2016-07-21 11:58:47Z mav $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <pthread_np.h>
52#include <inttypes.h>
53#include <md5.h>
54
55#include "bhyverun.h"
56#include "pci_emul.h"
57#include "ahci.h"
58#include "block_if.h"
59
60#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
61
62#define	PxSIG_ATA	0x00000101 /* ATA drive */
63#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
64
65enum sata_fis_type {
66	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
67	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
68	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
69	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
70	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
71	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
72	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
73	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
74};
75
76/*
77 * SCSI opcodes
78 */
79#define	TEST_UNIT_READY		0x00
80#define	REQUEST_SENSE		0x03
81#define	INQUIRY			0x12
82#define	START_STOP_UNIT		0x1B
83#define	PREVENT_ALLOW		0x1E
84#define	READ_CAPACITY		0x25
85#define	READ_10			0x28
86#define	POSITION_TO_ELEMENT	0x2B
87#define	READ_TOC		0x43
88#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
89#define	MODE_SENSE_10		0x5A
90#define	REPORT_LUNS		0xA0
91#define	READ_12			0xA8
92#define	READ_CD			0xBE
93
94/*
95 * SCSI mode page codes
96 */
97#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
98#define	MODEPAGE_CD_CAPABILITIES	0x2A
99
100/*
101 * ATA commands
102 */
103#define	ATA_SF_ENAB_SATA_SF		0x10
104#define		ATA_SATA_SF_AN		0x05
105#define	ATA_SF_DIS_SATA_SF		0x90
106
107/*
108 * Debug printf
109 */
110#ifdef AHCI_DEBUG
111static FILE *dbg;
112#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
113#else
114#define DPRINTF(format, arg...)
115#endif
116#define WPRINTF(format, arg...) printf(format, ##arg)
117
118struct ahci_ioreq {
119	struct blockif_req io_req;
120	struct ahci_port *io_pr;
121	STAILQ_ENTRY(ahci_ioreq) io_flist;
122	TAILQ_ENTRY(ahci_ioreq) io_blist;
123	uint8_t *cfis;
124	uint32_t len;
125	uint32_t done;
126	int slot;
127	int more;
128};
129
130struct ahci_port {
131	struct blockif_ctxt *bctx;
132	struct pci_ahci_softc *pr_sc;
133	uint8_t *cmd_lst;
134	uint8_t *rfis;
135	char ident[20 + 1];
136	int atapi;
137	int reset;
138	int waitforclear;
139	int mult_sectors;
140	uint8_t xfermode;
141	uint8_t err_cfis[20];
142	uint8_t sense_key;
143	uint8_t asc;
144	u_int ccs;
145	uint32_t pending;
146
147	uint32_t clb;
148	uint32_t clbu;
149	uint32_t fb;
150	uint32_t fbu;
151	uint32_t is;
152	uint32_t ie;
153	uint32_t cmd;
154	uint32_t unused0;
155	uint32_t tfd;
156	uint32_t sig;
157	uint32_t ssts;
158	uint32_t sctl;
159	uint32_t serr;
160	uint32_t sact;
161	uint32_t ci;
162	uint32_t sntf;
163	uint32_t fbs;
164
165	/*
166	 * i/o request info
167	 */
168	struct ahci_ioreq *ioreq;
169	int ioqsz;
170	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
171	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
172};
173
174struct ahci_cmd_hdr {
175	uint16_t flags;
176	uint16_t prdtl;
177	uint32_t prdbc;
178	uint64_t ctba;
179	uint32_t reserved[4];
180};
181
182struct ahci_prdt_entry {
183	uint64_t dba;
184	uint32_t reserved;
185#define	DBCMASK		0x3fffff
186	uint32_t dbc;
187};
188
189struct pci_ahci_softc {
190	struct pci_devinst *asc_pi;
191	pthread_mutex_t	mtx;
192	int ports;
193	uint32_t cap;
194	uint32_t ghc;
195	uint32_t is;
196	uint32_t pi;
197	uint32_t vs;
198	uint32_t ccc_ctl;
199	uint32_t ccc_pts;
200	uint32_t em_loc;
201	uint32_t em_ctl;
202	uint32_t cap2;
203	uint32_t bohc;
204	uint32_t lintr;
205	struct ahci_port port[MAX_PORTS];
206};
207#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
208
209static void ahci_handle_port(struct ahci_port *p);
210
211static inline void lba_to_msf(uint8_t *buf, int lba)
212{
213	lba += 150;
214	buf[0] = (lba / 75) / 60;
215	buf[1] = (lba / 75) % 60;
216	buf[2] = lba % 75;
217}
218
219/*
220 * generate HBA intr depending on whether or not ports within
221 * the controller have an interrupt pending.
222 */
223static void
224ahci_generate_intr(struct pci_ahci_softc *sc)
225{
226	struct pci_devinst *pi;
227	int i;
228
229	pi = sc->asc_pi;
230
231	for (i = 0; i < sc->ports; i++) {
232		struct ahci_port *pr;
233		pr = &sc->port[i];
234		if (pr->is & pr->ie)
235			sc->is |= (1 << i);
236	}
237
238	DPRINTF("%s %x\n", __func__, sc->is);
239
240	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
241		if (pci_msi_enabled(pi)) {
242			/*
243			 * Generate an MSI interrupt on every edge
244			 */
245			pci_generate_msi(pi, 0);
246		} else if (!sc->lintr) {
247			/*
248			 * Only generate a pin-based interrupt if one wasn't
249			 * in progress
250			 */
251			sc->lintr = 1;
252			pci_lintr_assert(pi);
253		}
254	} else if (sc->lintr) {
255		/*
256		 * No interrupts: deassert pin-based signal if it had
257		 * been asserted
258		 */
259		pci_lintr_deassert(pi);
260		sc->lintr = 0;
261	}
262}
263
264static void
265ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
266{
267	int offset, len, irq;
268
269	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
270		return;
271
272	switch (ft) {
273	case FIS_TYPE_REGD2H:
274		offset = 0x40;
275		len = 20;
276		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
277		break;
278	case FIS_TYPE_SETDEVBITS:
279		offset = 0x58;
280		len = 8;
281		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
282		break;
283	case FIS_TYPE_PIOSETUP:
284		offset = 0x20;
285		len = 20;
286		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
287		break;
288	default:
289		WPRINTF("unsupported fis type %d\n", ft);
290		return;
291	}
292	if (fis[2] & ATA_S_ERROR) {
293		p->waitforclear = 1;
294		irq |= AHCI_P_IX_TFE;
295	}
296	memcpy(p->rfis + offset, fis, len);
297	if (irq) {
298		p->is |= irq;
299		ahci_generate_intr(p->pr_sc);
300	}
301}
302
303static void
304ahci_write_fis_piosetup(struct ahci_port *p)
305{
306	uint8_t fis[20];
307
308	memset(fis, 0, sizeof(fis));
309	fis[0] = FIS_TYPE_PIOSETUP;
310	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
311}
312
313static void
314ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
315{
316	uint8_t fis[8];
317	uint8_t error;
318
319	error = (tfd >> 8) & 0xff;
320	tfd &= 0x77;
321	memset(fis, 0, sizeof(fis));
322	fis[0] = FIS_TYPE_SETDEVBITS;
323	fis[1] = (1 << 6);
324	fis[2] = tfd;
325	fis[3] = error;
326	if (fis[2] & ATA_S_ERROR) {
327		p->err_cfis[0] = slot;
328		p->err_cfis[2] = tfd;
329		p->err_cfis[3] = error;
330		memcpy(&p->err_cfis[4], cfis + 4, 16);
331	} else {
332		*(uint32_t *)(fis + 4) = (1 << slot);
333		p->sact &= ~(1 << slot);
334	}
335	p->tfd &= ~0x77;
336	p->tfd |= tfd;
337	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
338}
339
340static void
341ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
342{
343	uint8_t fis[20];
344	uint8_t error;
345
346	error = (tfd >> 8) & 0xff;
347	memset(fis, 0, sizeof(fis));
348	fis[0] = FIS_TYPE_REGD2H;
349	fis[1] = (1 << 6);
350	fis[2] = tfd & 0xff;
351	fis[3] = error;
352	fis[4] = cfis[4];
353	fis[5] = cfis[5];
354	fis[6] = cfis[6];
355	fis[7] = cfis[7];
356	fis[8] = cfis[8];
357	fis[9] = cfis[9];
358	fis[10] = cfis[10];
359	fis[11] = cfis[11];
360	fis[12] = cfis[12];
361	fis[13] = cfis[13];
362	if (fis[2] & ATA_S_ERROR) {
363		p->err_cfis[0] = 0x80;
364		p->err_cfis[2] = tfd & 0xff;
365		p->err_cfis[3] = error;
366		memcpy(&p->err_cfis[4], cfis + 4, 16);
367	} else
368		p->ci &= ~(1 << slot);
369	p->tfd = tfd;
370	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
371}
372
373static void
374ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
375{
376	uint8_t fis[20];
377
378	p->tfd = ATA_S_READY | ATA_S_DSC;
379	memset(fis, 0, sizeof(fis));
380	fis[0] = FIS_TYPE_REGD2H;
381	fis[1] = 0;			/* No interrupt */
382	fis[2] = p->tfd;		/* Status */
383	fis[3] = 0;			/* No error */
384	p->ci &= ~(1 << slot);
385	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
386}
387
388static void
389ahci_write_reset_fis_d2h(struct ahci_port *p)
390{
391	uint8_t fis[20];
392
393	memset(fis, 0, sizeof(fis));
394	fis[0] = FIS_TYPE_REGD2H;
395	fis[3] = 1;
396	fis[4] = 1;
397	if (p->atapi) {
398		fis[5] = 0x14;
399		fis[6] = 0xeb;
400	}
401	fis[12] = 1;
402	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
403}
404
405static void
406ahci_check_stopped(struct ahci_port *p)
407{
408	/*
409	 * If we are no longer processing the command list and nothing
410	 * is in-flight, clear the running bit, the current command
411	 * slot, the command issue and active bits.
412	 */
413	if (!(p->cmd & AHCI_P_CMD_ST)) {
414		if (p->pending == 0) {
415			p->ccs = 0;
416			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
417			p->ci = 0;
418			p->sact = 0;
419			p->waitforclear = 0;
420		}
421	}
422}
423
424static void
425ahci_port_stop(struct ahci_port *p)
426{
427	struct ahci_ioreq *aior;
428	uint8_t *cfis;
429	int slot;
430	int ncq;
431	int error;
432
433	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
434
435	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
436		/*
437		 * Try to cancel the outstanding blockif request.
438		 */
439		error = blockif_cancel(p->bctx, &aior->io_req);
440		if (error != 0)
441			continue;
442
443		slot = aior->slot;
444		cfis = aior->cfis;
445		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
446		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
447		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
448			ncq = 1;
449
450		if (ncq)
451			p->sact &= ~(1 << slot);
452		else
453			p->ci &= ~(1 << slot);
454
455		/*
456		 * This command is now done.
457		 */
458		p->pending &= ~(1 << slot);
459
460		/*
461		 * Delete the blockif request from the busy list
462		 */
463		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
464
465		/*
466		 * Move the blockif request back to the free list
467		 */
468		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
469	}
470
471	ahci_check_stopped(p);
472}
473
474static void
475ahci_port_reset(struct ahci_port *pr)
476{
477	pr->serr = 0;
478	pr->sact = 0;
479	pr->xfermode = ATA_UDMA6;
480	pr->mult_sectors = 128;
481
482	if (!pr->bctx) {
483		pr->ssts = ATA_SS_DET_NO_DEVICE;
484		pr->sig = 0xFFFFFFFF;
485		pr->tfd = 0x7F;
486		return;
487	}
488	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
489	if (pr->sctl & ATA_SC_SPD_MASK)
490		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
491	else
492		pr->ssts |= ATA_SS_SPD_GEN3;
493	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
494	if (!pr->atapi) {
495		pr->sig = PxSIG_ATA;
496		pr->tfd |= ATA_S_READY;
497	} else
498		pr->sig = PxSIG_ATAPI;
499	ahci_write_reset_fis_d2h(pr);
500}
501
502static void
503ahci_reset(struct pci_ahci_softc *sc)
504{
505	int i;
506
507	sc->ghc = AHCI_GHC_AE;
508	sc->is = 0;
509
510	if (sc->lintr) {
511		pci_lintr_deassert(sc->asc_pi);
512		sc->lintr = 0;
513	}
514
515	for (i = 0; i < sc->ports; i++) {
516		sc->port[i].ie = 0;
517		sc->port[i].is = 0;
518		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
519		if (sc->port[i].bctx)
520			sc->port[i].cmd |= AHCI_P_CMD_CPS;
521		sc->port[i].sctl = 0;
522		ahci_port_reset(&sc->port[i]);
523	}
524}
525
526static void
527ata_string(uint8_t *dest, const char *src, int len)
528{
529	int i;
530
531	for (i = 0; i < len; i++) {
532		if (*src)
533			dest[i ^ 1] = *src++;
534		else
535			dest[i ^ 1] = ' ';
536	}
537}
538
539static void
540atapi_string(uint8_t *dest, const char *src, int len)
541{
542	int i;
543
544	for (i = 0; i < len; i++) {
545		if (*src)
546			dest[i] = *src++;
547		else
548			dest[i] = ' ';
549	}
550}
551
552/*
553 * Build up the iovec based on the PRDT, 'done' and 'len'.
554 */
555static void
556ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
557    struct ahci_prdt_entry *prdt, uint16_t prdtl)
558{
559	struct blockif_req *breq = &aior->io_req;
560	int i, j, skip, todo, left, extra;
561	uint32_t dbcsz;
562
563	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
564	skip = aior->done;
565	left = aior->len - aior->done;
566	todo = 0;
567	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
568	    i++, prdt++) {
569		dbcsz = (prdt->dbc & DBCMASK) + 1;
570		/* Skip already done part of the PRDT */
571		if (dbcsz <= skip) {
572			skip -= dbcsz;
573			continue;
574		}
575		dbcsz -= skip;
576		if (dbcsz > left)
577			dbcsz = left;
578		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
579		    prdt->dba + skip, dbcsz);
580		breq->br_iov[j].iov_len = dbcsz;
581		todo += dbcsz;
582		left -= dbcsz;
583		skip = 0;
584		j++;
585	}
586
587	/* If we got limited by IOV length, round I/O down to sector size. */
588	if (j == BLOCKIF_IOV_MAX) {
589		extra = todo % blockif_sectsz(p->bctx);
590		todo -= extra;
591		assert(todo > 0);
592		while (extra > 0) {
593			if (breq->br_iov[j - 1].iov_len > extra) {
594				breq->br_iov[j - 1].iov_len -= extra;
595				break;
596			}
597			extra -= breq->br_iov[j - 1].iov_len;
598			j--;
599		}
600	}
601
602	breq->br_iovcnt = j;
603	breq->br_resid = todo;
604	aior->done += todo;
605	aior->more = (aior->done < aior->len && i < prdtl);
606}
607
608static void
609ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
610{
611	struct ahci_ioreq *aior;
612	struct blockif_req *breq;
613	struct ahci_prdt_entry *prdt;
614	struct ahci_cmd_hdr *hdr;
615	uint64_t lba;
616	uint32_t len;
617	int err, first, ncq, readop;
618
619	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
620	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
621	ncq = 0;
622	readop = 1;
623	first = (done == 0);
624
625	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
626	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
627	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
628	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
629		readop = 0;
630
631	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
632	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
633		lba = ((uint64_t)cfis[10] << 40) |
634			((uint64_t)cfis[9] << 32) |
635			((uint64_t)cfis[8] << 24) |
636			((uint64_t)cfis[6] << 16) |
637			((uint64_t)cfis[5] << 8) |
638			cfis[4];
639		len = cfis[11] << 8 | cfis[3];
640		if (!len)
641			len = 65536;
642		ncq = 1;
643	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
644	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
645	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
646		lba = ((uint64_t)cfis[10] << 40) |
647			((uint64_t)cfis[9] << 32) |
648			((uint64_t)cfis[8] << 24) |
649			((uint64_t)cfis[6] << 16) |
650			((uint64_t)cfis[5] << 8) |
651			cfis[4];
652		len = cfis[13] << 8 | cfis[12];
653		if (!len)
654			len = 65536;
655	} else {
656		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
657			(cfis[5] << 8) | cfis[4];
658		len = cfis[12];
659		if (!len)
660			len = 256;
661	}
662	lba *= blockif_sectsz(p->bctx);
663	len *= blockif_sectsz(p->bctx);
664
665	/* Pull request off free list */
666	aior = STAILQ_FIRST(&p->iofhd);
667	assert(aior != NULL);
668	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
669
670	aior->cfis = cfis;
671	aior->slot = slot;
672	aior->len = len;
673	aior->done = done;
674	breq = &aior->io_req;
675	breq->br_offset = lba + done;
676	ahci_build_iov(p, aior, prdt, hdr->prdtl);
677
678	/* Mark this command in-flight. */
679	p->pending |= 1 << slot;
680
681	/* Stuff request onto busy list. */
682	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
683
684	if (ncq && first)
685		ahci_write_fis_d2h_ncq(p, slot);
686
687	if (readop)
688		err = blockif_read(p->bctx, breq);
689	else
690		err = blockif_write(p->bctx, breq);
691	assert(err == 0);
692}
693
694static void
695ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
696{
697	struct ahci_ioreq *aior;
698	struct blockif_req *breq;
699	int err;
700
701	/*
702	 * Pull request off free list
703	 */
704	aior = STAILQ_FIRST(&p->iofhd);
705	assert(aior != NULL);
706	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
707	aior->cfis = cfis;
708	aior->slot = slot;
709	aior->len = 0;
710	aior->done = 0;
711	aior->more = 0;
712	breq = &aior->io_req;
713
714	/*
715	 * Mark this command in-flight.
716	 */
717	p->pending |= 1 << slot;
718
719	/*
720	 * Stuff request onto busy list
721	 */
722	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
723
724	err = blockif_flush(p->bctx, breq);
725	assert(err == 0);
726}
727
728static inline void
729read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
730		void *buf, int size)
731{
732	struct ahci_cmd_hdr *hdr;
733	struct ahci_prdt_entry *prdt;
734	void *to;
735	int i, len;
736
737	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
738	len = size;
739	to = buf;
740	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
741	for (i = 0; i < hdr->prdtl && len; i++) {
742		uint8_t *ptr;
743		uint32_t dbcsz;
744		int sublen;
745
746		dbcsz = (prdt->dbc & DBCMASK) + 1;
747		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
748		sublen = len < dbcsz ? len : dbcsz;
749		memcpy(to, ptr, sublen);
750		len -= sublen;
751		to += sublen;
752		prdt++;
753	}
754}
755
756static void
757ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
758{
759	struct ahci_ioreq *aior;
760	struct blockif_req *breq;
761	uint8_t *entry;
762	uint64_t elba;
763	uint32_t len, elen;
764	int err, first, ncq;
765	uint8_t buf[512];
766
767	first = (done == 0);
768	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
769		len = (uint16_t)cfis[13] << 8 | cfis[12];
770		len *= 512;
771		ncq = 0;
772	} else { /* ATA_SEND_FPDMA_QUEUED */
773		len = (uint16_t)cfis[11] << 8 | cfis[3];
774		len *= 512;
775		ncq = 1;
776	}
777	read_prdt(p, slot, cfis, buf, sizeof(buf));
778
779next:
780	entry = &buf[done];
781	elba = ((uint64_t)entry[5] << 40) |
782		((uint64_t)entry[4] << 32) |
783		((uint64_t)entry[3] << 24) |
784		((uint64_t)entry[2] << 16) |
785		((uint64_t)entry[1] << 8) |
786		entry[0];
787	elen = (uint16_t)entry[7] << 8 | entry[6];
788	done += 8;
789	if (elen == 0) {
790		if (done >= len) {
791			if (ncq) {
792				if (first)
793					ahci_write_fis_d2h_ncq(p, slot);
794				ahci_write_fis_sdb(p, slot, cfis,
795				    ATA_S_READY | ATA_S_DSC);
796			} else {
797				ahci_write_fis_d2h(p, slot, cfis,
798				    ATA_S_READY | ATA_S_DSC);
799			}
800			p->pending &= ~(1 << slot);
801			ahci_check_stopped(p);
802			if (!first)
803				ahci_handle_port(p);
804			return;
805		}
806		goto next;
807	}
808
809	/*
810	 * Pull request off free list
811	 */
812	aior = STAILQ_FIRST(&p->iofhd);
813	assert(aior != NULL);
814	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
815	aior->cfis = cfis;
816	aior->slot = slot;
817	aior->len = len;
818	aior->done = done;
819	aior->more = (len != done);
820
821	breq = &aior->io_req;
822	breq->br_offset = elba * blockif_sectsz(p->bctx);
823	breq->br_resid = elen * blockif_sectsz(p->bctx);
824
825	/*
826	 * Mark this command in-flight.
827	 */
828	p->pending |= 1 << slot;
829
830	/*
831	 * Stuff request onto busy list
832	 */
833	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
834
835	if (ncq && first)
836		ahci_write_fis_d2h_ncq(p, slot);
837
838	err = blockif_delete(p->bctx, breq);
839	assert(err == 0);
840}
841
842static inline void
843write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
844		void *buf, int size)
845{
846	struct ahci_cmd_hdr *hdr;
847	struct ahci_prdt_entry *prdt;
848	void *from;
849	int i, len;
850
851	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
852	len = size;
853	from = buf;
854	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
855	for (i = 0; i < hdr->prdtl && len; i++) {
856		uint8_t *ptr;
857		uint32_t dbcsz;
858		int sublen;
859
860		dbcsz = (prdt->dbc & DBCMASK) + 1;
861		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
862		sublen = len < dbcsz ? len : dbcsz;
863		memcpy(ptr, from, sublen);
864		len -= sublen;
865		from += sublen;
866		prdt++;
867	}
868	hdr->prdbc = size - len;
869}
870
871static void
872ahci_checksum(uint8_t *buf, int size)
873{
874	int i;
875	uint8_t sum = 0;
876
877	for (i = 0; i < size - 1; i++)
878		sum += buf[i];
879	buf[size - 1] = 0x100 - sum;
880}
881
882static void
883ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
884{
885	struct ahci_cmd_hdr *hdr;
886	uint8_t buf[512];
887
888	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
889	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
890	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
891		ahci_write_fis_d2h(p, slot, cfis,
892		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
893		return;
894	}
895
896	memset(buf, 0, sizeof(buf));
897	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
898	ahci_checksum(buf, sizeof(buf));
899
900	if (cfis[2] == ATA_READ_LOG_EXT)
901		ahci_write_fis_piosetup(p);
902	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
903	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
904}
905
906static void
907handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
908{
909	struct ahci_cmd_hdr *hdr;
910
911	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
912	if (p->atapi || hdr->prdtl == 0) {
913		ahci_write_fis_d2h(p, slot, cfis,
914		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
915	} else {
916		uint16_t buf[256];
917		uint64_t sectors;
918		int sectsz, psectsz, psectoff, candelete, ro;
919		uint16_t cyl;
920		uint8_t sech, heads;
921
922		ro = blockif_is_ro(p->bctx);
923		candelete = blockif_candelete(p->bctx);
924		sectsz = blockif_sectsz(p->bctx);
925		sectors = blockif_size(p->bctx) / sectsz;
926		blockif_chs(p->bctx, &cyl, &heads, &sech);
927		blockif_psectsz(p->bctx, &psectsz, &psectoff);
928		memset(buf, 0, sizeof(buf));
929		buf[0] = 0x0040;
930		buf[1] = cyl;
931		buf[3] = heads;
932		buf[6] = sech;
933		ata_string((uint8_t *)(buf+10), p->ident, 20);
934		ata_string((uint8_t *)(buf+23), "001", 8);
935		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
936		buf[47] = (0x8000 | 128);
937		buf[48] = 0;
938		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
939		buf[50] = (1 << 14);
940		buf[53] = (1 << 1 | 1 << 2);
941		if (p->mult_sectors)
942			buf[59] = (0x100 | p->mult_sectors);
943		if (sectors <= 0x0fffffff) {
944			buf[60] = sectors;
945			buf[61] = (sectors >> 16);
946		} else {
947			buf[60] = 0xffff;
948			buf[61] = 0x0fff;
949		}
950		buf[63] = 0x7;
951		if (p->xfermode & ATA_WDMA0)
952			buf[63] |= (1 << ((p->xfermode & 7) + 8));
953		buf[64] = 0x3;
954		buf[65] = 120;
955		buf[66] = 120;
956		buf[67] = 120;
957		buf[68] = 120;
958		buf[69] = 0;
959		buf[75] = 31;
960		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
961			   ATA_SUPPORT_NCQ);
962		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
963			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
964		buf[80] = 0x3f0;
965		buf[81] = 0x28;
966		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
967			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
968		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
969			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
970		buf[84] = (1 << 14);
971		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
972			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
973		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
974			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
975		buf[87] = (1 << 14);
976		buf[88] = 0x7f;
977		if (p->xfermode & ATA_UDMA0)
978			buf[88] |= (1 << ((p->xfermode & 7) + 8));
979		buf[100] = sectors;
980		buf[101] = (sectors >> 16);
981		buf[102] = (sectors >> 32);
982		buf[103] = (sectors >> 48);
983		if (candelete && !ro) {
984			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
985			buf[105] = 1;
986			buf[169] = ATA_SUPPORT_DSM_TRIM;
987		}
988		buf[106] = 0x4000;
989		buf[209] = 0x4000;
990		if (psectsz > sectsz) {
991			buf[106] |= 0x2000;
992			buf[106] |= ffsl(psectsz / sectsz) - 1;
993			buf[209] |= (psectoff / sectsz);
994		}
995		if (sectsz > 512) {
996			buf[106] |= 0x1000;
997			buf[117] = sectsz / 2;
998			buf[118] = ((sectsz / 2) >> 16);
999		}
1000		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1001		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
1002		buf[222] = 0x1020;
1003		buf[255] = 0x00a5;
1004		ahci_checksum((uint8_t *)buf, sizeof(buf));
1005		ahci_write_fis_piosetup(p);
1006		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1007		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1008	}
1009}
1010
1011static void
1012handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1013{
1014	if (!p->atapi) {
1015		ahci_write_fis_d2h(p, slot, cfis,
1016		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1017	} else {
1018		uint16_t buf[256];
1019
1020		memset(buf, 0, sizeof(buf));
1021		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
1022		ata_string((uint8_t *)(buf+10), p->ident, 20);
1023		ata_string((uint8_t *)(buf+23), "001", 8);
1024		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
1025		buf[49] = (1 << 9 | 1 << 8);
1026		buf[50] = (1 << 14 | 1);
1027		buf[53] = (1 << 2 | 1 << 1);
1028		buf[62] = 0x3f;
1029		buf[63] = 7;
1030		if (p->xfermode & ATA_WDMA0)
1031			buf[63] |= (1 << ((p->xfermode & 7) + 8));
1032		buf[64] = 3;
1033		buf[65] = 120;
1034		buf[66] = 120;
1035		buf[67] = 120;
1036		buf[68] = 120;
1037		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1038		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1039		buf[78] = (1 << 5);
1040		buf[80] = 0x3f0;
1041		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1042			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1043		buf[83] = (1 << 14);
1044		buf[84] = (1 << 14);
1045		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1046			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1047		buf[87] = (1 << 14);
1048		buf[88] = 0x7f;
1049		if (p->xfermode & ATA_UDMA0)
1050			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1051		buf[222] = 0x1020;
1052		buf[255] = 0x00a5;
1053		ahci_checksum((uint8_t *)buf, sizeof(buf));
1054		ahci_write_fis_piosetup(p);
1055		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1056		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1057	}
1058}
1059
1060static void
1061atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1062{
1063	uint8_t buf[36];
1064	uint8_t *acmd;
1065	int len;
1066	uint32_t tfd;
1067
1068	acmd = cfis + 0x40;
1069
1070	if (acmd[1] & 1) {		/* VPD */
1071		if (acmd[2] == 0) {	/* Supported VPD pages */
1072			buf[0] = 0x05;
1073			buf[1] = 0;
1074			buf[2] = 0;
1075			buf[3] = 1;
1076			buf[4] = 0;
1077			len = 4 + buf[3];
1078		} else {
1079			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1080			p->asc = 0x24;
1081			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1082			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1083			ahci_write_fis_d2h(p, slot, cfis, tfd);
1084			return;
1085		}
1086	} else {
1087		buf[0] = 0x05;
1088		buf[1] = 0x80;
1089		buf[2] = 0x00;
1090		buf[3] = 0x21;
1091		buf[4] = 31;
1092		buf[5] = 0;
1093		buf[6] = 0;
1094		buf[7] = 0;
1095		atapi_string(buf + 8, "BHYVE", 8);
1096		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1097		atapi_string(buf + 32, "001", 4);
1098		len = sizeof(buf);
1099	}
1100
1101	if (len > acmd[4])
1102		len = acmd[4];
1103	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1104	write_prdt(p, slot, cfis, buf, len);
1105	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1106}
1107
1108static void
1109atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1110{
1111	uint8_t buf[8];
1112	uint64_t sectors;
1113
1114	sectors = blockif_size(p->bctx) / 2048;
1115	be32enc(buf, sectors - 1);
1116	be32enc(buf + 4, 2048);
1117	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1118	write_prdt(p, slot, cfis, buf, sizeof(buf));
1119	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1120}
1121
1122static void
1123atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1124{
1125	uint8_t *acmd;
1126	uint8_t format;
1127	int len;
1128
1129	acmd = cfis + 0x40;
1130
1131	len = be16dec(acmd + 7);
1132	format = acmd[9] >> 6;
1133	switch (format) {
1134	case 0:
1135	{
1136		int msf, size;
1137		uint64_t sectors;
1138		uint8_t start_track, buf[20], *bp;
1139
1140		msf = (acmd[1] >> 1) & 1;
1141		start_track = acmd[6];
1142		if (start_track > 1 && start_track != 0xaa) {
1143			uint32_t tfd;
1144			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1145			p->asc = 0x24;
1146			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1147			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1148			ahci_write_fis_d2h(p, slot, cfis, tfd);
1149			return;
1150		}
1151		bp = buf + 2;
1152		*bp++ = 1;
1153		*bp++ = 1;
1154		if (start_track <= 1) {
1155			*bp++ = 0;
1156			*bp++ = 0x14;
1157			*bp++ = 1;
1158			*bp++ = 0;
1159			if (msf) {
1160				*bp++ = 0;
1161				lba_to_msf(bp, 0);
1162				bp += 3;
1163			} else {
1164				*bp++ = 0;
1165				*bp++ = 0;
1166				*bp++ = 0;
1167				*bp++ = 0;
1168			}
1169		}
1170		*bp++ = 0;
1171		*bp++ = 0x14;
1172		*bp++ = 0xaa;
1173		*bp++ = 0;
1174		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1175		sectors >>= 2;
1176		if (msf) {
1177			*bp++ = 0;
1178			lba_to_msf(bp, sectors);
1179			bp += 3;
1180		} else {
1181			be32enc(bp, sectors);
1182			bp += 4;
1183		}
1184		size = bp - buf;
1185		be16enc(buf, size - 2);
1186		if (len > size)
1187			len = size;
1188		write_prdt(p, slot, cfis, buf, len);
1189		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1190		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1191		break;
1192	}
1193	case 1:
1194	{
1195		uint8_t buf[12];
1196
1197		memset(buf, 0, sizeof(buf));
1198		buf[1] = 0xa;
1199		buf[2] = 0x1;
1200		buf[3] = 0x1;
1201		if (len > sizeof(buf))
1202			len = sizeof(buf);
1203		write_prdt(p, slot, cfis, buf, len);
1204		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1205		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1206		break;
1207	}
1208	case 2:
1209	{
1210		int msf, size;
1211		uint64_t sectors;
1212		uint8_t start_track, *bp, buf[50];
1213
1214		msf = (acmd[1] >> 1) & 1;
1215		start_track = acmd[6];
1216		bp = buf + 2;
1217		*bp++ = 1;
1218		*bp++ = 1;
1219
1220		*bp++ = 1;
1221		*bp++ = 0x14;
1222		*bp++ = 0;
1223		*bp++ = 0xa0;
1224		*bp++ = 0;
1225		*bp++ = 0;
1226		*bp++ = 0;
1227		*bp++ = 0;
1228		*bp++ = 1;
1229		*bp++ = 0;
1230		*bp++ = 0;
1231
1232		*bp++ = 1;
1233		*bp++ = 0x14;
1234		*bp++ = 0;
1235		*bp++ = 0xa1;
1236		*bp++ = 0;
1237		*bp++ = 0;
1238		*bp++ = 0;
1239		*bp++ = 0;
1240		*bp++ = 1;
1241		*bp++ = 0;
1242		*bp++ = 0;
1243
1244		*bp++ = 1;
1245		*bp++ = 0x14;
1246		*bp++ = 0;
1247		*bp++ = 0xa2;
1248		*bp++ = 0;
1249		*bp++ = 0;
1250		*bp++ = 0;
1251		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1252		sectors >>= 2;
1253		if (msf) {
1254			*bp++ = 0;
1255			lba_to_msf(bp, sectors);
1256			bp += 3;
1257		} else {
1258			be32enc(bp, sectors);
1259			bp += 4;
1260		}
1261
1262		*bp++ = 1;
1263		*bp++ = 0x14;
1264		*bp++ = 0;
1265		*bp++ = 1;
1266		*bp++ = 0;
1267		*bp++ = 0;
1268		*bp++ = 0;
1269		if (msf) {
1270			*bp++ = 0;
1271			lba_to_msf(bp, 0);
1272			bp += 3;
1273		} else {
1274			*bp++ = 0;
1275			*bp++ = 0;
1276			*bp++ = 0;
1277			*bp++ = 0;
1278		}
1279
1280		size = bp - buf;
1281		be16enc(buf, size - 2);
1282		if (len > size)
1283			len = size;
1284		write_prdt(p, slot, cfis, buf, len);
1285		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1286		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1287		break;
1288	}
1289	default:
1290	{
1291		uint32_t tfd;
1292
1293		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1294		p->asc = 0x24;
1295		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1296		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1297		ahci_write_fis_d2h(p, slot, cfis, tfd);
1298		break;
1299	}
1300	}
1301}
1302
1303static void
1304atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1305{
1306	uint8_t buf[16];
1307
1308	memset(buf, 0, sizeof(buf));
1309	buf[3] = 8;
1310
1311	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1312	write_prdt(p, slot, cfis, buf, sizeof(buf));
1313	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1314}
1315
1316static void
1317atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1318{
1319	struct ahci_ioreq *aior;
1320	struct ahci_cmd_hdr *hdr;
1321	struct ahci_prdt_entry *prdt;
1322	struct blockif_req *breq;
1323	struct pci_ahci_softc *sc;
1324	uint8_t *acmd;
1325	uint64_t lba;
1326	uint32_t len;
1327	int err;
1328
1329	sc = p->pr_sc;
1330	acmd = cfis + 0x40;
1331	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1332	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1333
1334	lba = be32dec(acmd + 2);
1335	if (acmd[0] == READ_10)
1336		len = be16dec(acmd + 7);
1337	else
1338		len = be32dec(acmd + 6);
1339	if (len == 0) {
1340		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1341		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1342	}
1343	lba *= 2048;
1344	len *= 2048;
1345
1346	/*
1347	 * Pull request off free list
1348	 */
1349	aior = STAILQ_FIRST(&p->iofhd);
1350	assert(aior != NULL);
1351	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1352	aior->cfis = cfis;
1353	aior->slot = slot;
1354	aior->len = len;
1355	aior->done = done;
1356	breq = &aior->io_req;
1357	breq->br_offset = lba + done;
1358	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1359
1360	/* Mark this command in-flight. */
1361	p->pending |= 1 << slot;
1362
1363	/* Stuff request onto busy list. */
1364	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1365
1366	err = blockif_read(p->bctx, breq);
1367	assert(err == 0);
1368}
1369
1370static void
1371atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1372{
1373	uint8_t buf[64];
1374	uint8_t *acmd;
1375	int len;
1376
1377	acmd = cfis + 0x40;
1378	len = acmd[4];
1379	if (len > sizeof(buf))
1380		len = sizeof(buf);
1381	memset(buf, 0, len);
1382	buf[0] = 0x70 | (1 << 7);
1383	buf[2] = p->sense_key;
1384	buf[7] = 10;
1385	buf[12] = p->asc;
1386	write_prdt(p, slot, cfis, buf, len);
1387	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1388	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1389}
1390
1391static void
1392atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1393{
1394	uint8_t *acmd = cfis + 0x40;
1395	uint32_t tfd;
1396
1397	switch (acmd[4] & 3) {
1398	case 0:
1399	case 1:
1400	case 3:
1401		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1402		tfd = ATA_S_READY | ATA_S_DSC;
1403		break;
1404	case 2:
1405		/* TODO eject media */
1406		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1407		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1408		p->asc = 0x53;
1409		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1410		break;
1411	}
1412	ahci_write_fis_d2h(p, slot, cfis, tfd);
1413}
1414
1415static void
1416atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1417{
1418	uint8_t *acmd;
1419	uint32_t tfd;
1420	uint8_t pc, code;
1421	int len;
1422
1423	acmd = cfis + 0x40;
1424	len = be16dec(acmd + 7);
1425	pc = acmd[2] >> 6;
1426	code = acmd[2] & 0x3f;
1427
1428	switch (pc) {
1429	case 0:
1430		switch (code) {
1431		case MODEPAGE_RW_ERROR_RECOVERY:
1432		{
1433			uint8_t buf[16];
1434
1435			if (len > sizeof(buf))
1436				len = sizeof(buf);
1437
1438			memset(buf, 0, sizeof(buf));
1439			be16enc(buf, 16 - 2);
1440			buf[2] = 0x70;
1441			buf[8] = 0x01;
1442			buf[9] = 16 - 10;
1443			buf[11] = 0x05;
1444			write_prdt(p, slot, cfis, buf, len);
1445			tfd = ATA_S_READY | ATA_S_DSC;
1446			break;
1447		}
1448		case MODEPAGE_CD_CAPABILITIES:
1449		{
1450			uint8_t buf[30];
1451
1452			if (len > sizeof(buf))
1453				len = sizeof(buf);
1454
1455			memset(buf, 0, sizeof(buf));
1456			be16enc(buf, 30 - 2);
1457			buf[2] = 0x70;
1458			buf[8] = 0x2A;
1459			buf[9] = 30 - 10;
1460			buf[10] = 0x08;
1461			buf[12] = 0x71;
1462			be16enc(&buf[18], 2);
1463			be16enc(&buf[20], 512);
1464			write_prdt(p, slot, cfis, buf, len);
1465			tfd = ATA_S_READY | ATA_S_DSC;
1466			break;
1467		}
1468		default:
1469			goto error;
1470			break;
1471		}
1472		break;
1473	case 3:
1474		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1475		p->asc = 0x39;
1476		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1477		break;
1478error:
1479	case 1:
1480	case 2:
1481		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1482		p->asc = 0x24;
1483		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1484		break;
1485	}
1486	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1487	ahci_write_fis_d2h(p, slot, cfis, tfd);
1488}
1489
1490static void
1491atapi_get_event_status_notification(struct ahci_port *p, int slot,
1492    uint8_t *cfis)
1493{
1494	uint8_t *acmd;
1495	uint32_t tfd;
1496
1497	acmd = cfis + 0x40;
1498
1499	/* we don't support asynchronous operation */
1500	if (!(acmd[1] & 1)) {
1501		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1502		p->asc = 0x24;
1503		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1504	} else {
1505		uint8_t buf[8];
1506		int len;
1507
1508		len = be16dec(acmd + 7);
1509		if (len > sizeof(buf))
1510			len = sizeof(buf);
1511
1512		memset(buf, 0, sizeof(buf));
1513		be16enc(buf, 8 - 2);
1514		buf[2] = 0x04;
1515		buf[3] = 0x10;
1516		buf[5] = 0x02;
1517		write_prdt(p, slot, cfis, buf, len);
1518		tfd = ATA_S_READY | ATA_S_DSC;
1519	}
1520	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1521	ahci_write_fis_d2h(p, slot, cfis, tfd);
1522}
1523
1524static void
1525handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1526{
1527	uint8_t *acmd;
1528
1529	acmd = cfis + 0x40;
1530
1531#ifdef AHCI_DEBUG
1532	{
1533		int i;
1534		DPRINTF("ACMD:");
1535		for (i = 0; i < 16; i++)
1536			DPRINTF("%02x ", acmd[i]);
1537		DPRINTF("\n");
1538	}
1539#endif
1540
1541	switch (acmd[0]) {
1542	case TEST_UNIT_READY:
1543		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1544		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1545		break;
1546	case INQUIRY:
1547		atapi_inquiry(p, slot, cfis);
1548		break;
1549	case READ_CAPACITY:
1550		atapi_read_capacity(p, slot, cfis);
1551		break;
1552	case PREVENT_ALLOW:
1553		/* TODO */
1554		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1555		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1556		break;
1557	case READ_TOC:
1558		atapi_read_toc(p, slot, cfis);
1559		break;
1560	case REPORT_LUNS:
1561		atapi_report_luns(p, slot, cfis);
1562		break;
1563	case READ_10:
1564	case READ_12:
1565		atapi_read(p, slot, cfis, 0);
1566		break;
1567	case REQUEST_SENSE:
1568		atapi_request_sense(p, slot, cfis);
1569		break;
1570	case START_STOP_UNIT:
1571		atapi_start_stop_unit(p, slot, cfis);
1572		break;
1573	case MODE_SENSE_10:
1574		atapi_mode_sense(p, slot, cfis);
1575		break;
1576	case GET_EVENT_STATUS_NOTIFICATION:
1577		atapi_get_event_status_notification(p, slot, cfis);
1578		break;
1579	default:
1580		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1581		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1582		p->asc = 0x20;
1583		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1584				ATA_S_READY | ATA_S_ERROR);
1585		break;
1586	}
1587}
1588
1589static void
1590ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1591{
1592
1593	p->tfd |= ATA_S_BUSY;
1594	switch (cfis[2]) {
1595	case ATA_ATA_IDENTIFY:
1596		handle_identify(p, slot, cfis);
1597		break;
1598	case ATA_SETFEATURES:
1599	{
1600		switch (cfis[3]) {
1601		case ATA_SF_ENAB_SATA_SF:
1602			switch (cfis[12]) {
1603			case ATA_SATA_SF_AN:
1604				p->tfd = ATA_S_DSC | ATA_S_READY;
1605				break;
1606			default:
1607				p->tfd = ATA_S_ERROR | ATA_S_READY;
1608				p->tfd |= (ATA_ERROR_ABORT << 8);
1609				break;
1610			}
1611			break;
1612		case ATA_SF_ENAB_WCACHE:
1613		case ATA_SF_DIS_WCACHE:
1614		case ATA_SF_ENAB_RCACHE:
1615		case ATA_SF_DIS_RCACHE:
1616			p->tfd = ATA_S_DSC | ATA_S_READY;
1617			break;
1618		case ATA_SF_SETXFER:
1619		{
1620			switch (cfis[12] & 0xf8) {
1621			case ATA_PIO:
1622			case ATA_PIO0:
1623				break;
1624			case ATA_WDMA0:
1625			case ATA_UDMA0:
1626				p->xfermode = (cfis[12] & 0x7);
1627				break;
1628			}
1629			p->tfd = ATA_S_DSC | ATA_S_READY;
1630			break;
1631		}
1632		default:
1633			p->tfd = ATA_S_ERROR | ATA_S_READY;
1634			p->tfd |= (ATA_ERROR_ABORT << 8);
1635			break;
1636		}
1637		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1638		break;
1639	}
1640	case ATA_SET_MULTI:
1641		if (cfis[12] != 0 &&
1642			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1643			p->tfd = ATA_S_ERROR | ATA_S_READY;
1644			p->tfd |= (ATA_ERROR_ABORT << 8);
1645		} else {
1646			p->mult_sectors = cfis[12];
1647			p->tfd = ATA_S_DSC | ATA_S_READY;
1648		}
1649		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1650		break;
1651	case ATA_READ:
1652	case ATA_WRITE:
1653	case ATA_READ48:
1654	case ATA_WRITE48:
1655	case ATA_READ_MUL:
1656	case ATA_WRITE_MUL:
1657	case ATA_READ_MUL48:
1658	case ATA_WRITE_MUL48:
1659	case ATA_READ_DMA:
1660	case ATA_WRITE_DMA:
1661	case ATA_READ_DMA48:
1662	case ATA_WRITE_DMA48:
1663	case ATA_READ_FPDMA_QUEUED:
1664	case ATA_WRITE_FPDMA_QUEUED:
1665		ahci_handle_rw(p, slot, cfis, 0);
1666		break;
1667	case ATA_FLUSHCACHE:
1668	case ATA_FLUSHCACHE48:
1669		ahci_handle_flush(p, slot, cfis);
1670		break;
1671	case ATA_DATA_SET_MANAGEMENT:
1672		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1673		    cfis[13] == 0 && cfis[12] == 1) {
1674			ahci_handle_dsm_trim(p, slot, cfis, 0);
1675			break;
1676		}
1677		ahci_write_fis_d2h(p, slot, cfis,
1678		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1679		break;
1680	case ATA_SEND_FPDMA_QUEUED:
1681		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1682		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1683		    cfis[11] == 0 && cfis[3] == 1) {
1684			ahci_handle_dsm_trim(p, slot, cfis, 0);
1685			break;
1686		}
1687		ahci_write_fis_d2h(p, slot, cfis,
1688		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1689		break;
1690	case ATA_READ_LOG_EXT:
1691	case ATA_READ_LOG_DMA_EXT:
1692		ahci_handle_read_log(p, slot, cfis);
1693		break;
1694	case ATA_SECURITY_FREEZE_LOCK:
1695	case ATA_SMART_CMD:
1696	case ATA_NOP:
1697		ahci_write_fis_d2h(p, slot, cfis,
1698		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1699		break;
1700	case ATA_CHECK_POWER_MODE:
1701		cfis[12] = 0xff;	/* always on */
1702		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1703		break;
1704	case ATA_STANDBY_CMD:
1705	case ATA_STANDBY_IMMEDIATE:
1706	case ATA_IDLE_CMD:
1707	case ATA_IDLE_IMMEDIATE:
1708	case ATA_SLEEP:
1709	case ATA_READ_VERIFY:
1710	case ATA_READ_VERIFY48:
1711		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1712		break;
1713	case ATA_ATAPI_IDENTIFY:
1714		handle_atapi_identify(p, slot, cfis);
1715		break;
1716	case ATA_PACKET_CMD:
1717		if (!p->atapi) {
1718			ahci_write_fis_d2h(p, slot, cfis,
1719			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1720		} else
1721			handle_packet_cmd(p, slot, cfis);
1722		break;
1723	default:
1724		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1725		ahci_write_fis_d2h(p, slot, cfis,
1726		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1727		break;
1728	}
1729}
1730
1731static void
1732ahci_handle_slot(struct ahci_port *p, int slot)
1733{
1734	struct ahci_cmd_hdr *hdr;
1735#ifdef AHCI_DEBUG
1736	struct ahci_prdt_entry *prdt;
1737#endif
1738	struct pci_ahci_softc *sc;
1739	uint8_t *cfis;
1740#ifdef AHCI_DEBUG
1741	int cfl;
1742#endif
1743
1744	sc = p->pr_sc;
1745	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1746#ifdef AHCI_DEBUG
1747	cfl = (hdr->flags & 0x1f) * 4;
1748#endif
1749	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1750			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1751#ifdef AHCI_DEBUG
1752	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1753
1754	DPRINTF("\ncfis:");
1755	for (i = 0; i < cfl; i++) {
1756		if (i % 10 == 0)
1757			DPRINTF("\n");
1758		DPRINTF("%02x ", cfis[i]);
1759	}
1760	DPRINTF("\n");
1761
1762	for (i = 0; i < hdr->prdtl; i++) {
1763		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1764		prdt++;
1765	}
1766#endif
1767
1768	if (cfis[0] != FIS_TYPE_REGH2D) {
1769		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1770		return;
1771	}
1772
1773	if (cfis[1] & 0x80) {
1774		ahci_handle_cmd(p, slot, cfis);
1775	} else {
1776		if (cfis[15] & (1 << 2))
1777			p->reset = 1;
1778		else if (p->reset) {
1779			p->reset = 0;
1780			ahci_port_reset(p);
1781		}
1782		p->ci &= ~(1 << slot);
1783	}
1784}
1785
1786static void
1787ahci_handle_port(struct ahci_port *p)
1788{
1789
1790	if (!(p->cmd & AHCI_P_CMD_ST))
1791		return;
1792
1793	/*
1794	 * Search for any new commands to issue ignoring those that
1795	 * are already in-flight.  Stop if device is busy or in error.
1796	 */
1797	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1798		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1799			break;
1800		if (p->waitforclear)
1801			break;
1802		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1803			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1804			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1805			ahci_handle_slot(p, p->ccs);
1806		}
1807	}
1808}
1809
1810/*
1811 * blockif callback routine - this runs in the context of the blockif
1812 * i/o thread, so the mutex needs to be acquired.
1813 */
1814static void
1815ata_ioreq_cb(struct blockif_req *br, int err)
1816{
1817	struct ahci_cmd_hdr *hdr;
1818	struct ahci_ioreq *aior;
1819	struct ahci_port *p;
1820	struct pci_ahci_softc *sc;
1821	uint32_t tfd;
1822	uint8_t *cfis;
1823	int slot, ncq, dsm;
1824
1825	DPRINTF("%s %d\n", __func__, err);
1826
1827	ncq = dsm = 0;
1828	aior = br->br_param;
1829	p = aior->io_pr;
1830	cfis = aior->cfis;
1831	slot = aior->slot;
1832	sc = p->pr_sc;
1833	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1834
1835	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1836	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1837	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1838		ncq = 1;
1839	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1840	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1841	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1842		dsm = 1;
1843
1844	pthread_mutex_lock(&sc->mtx);
1845
1846	/*
1847	 * Delete the blockif request from the busy list
1848	 */
1849	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1850
1851	/*
1852	 * Move the blockif request back to the free list
1853	 */
1854	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1855
1856	if (!err)
1857		hdr->prdbc = aior->done;
1858
1859	if (!err && aior->more) {
1860		if (dsm)
1861			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1862		else
1863			ahci_handle_rw(p, slot, cfis, aior->done);
1864		goto out;
1865	}
1866
1867	if (!err)
1868		tfd = ATA_S_READY | ATA_S_DSC;
1869	else
1870		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1871	if (ncq)
1872		ahci_write_fis_sdb(p, slot, cfis, tfd);
1873	else
1874		ahci_write_fis_d2h(p, slot, cfis, tfd);
1875
1876	/*
1877	 * This command is now complete.
1878	 */
1879	p->pending &= ~(1 << slot);
1880
1881	ahci_check_stopped(p);
1882	ahci_handle_port(p);
1883out:
1884	pthread_mutex_unlock(&sc->mtx);
1885	DPRINTF("%s exit\n", __func__);
1886}
1887
1888static void
1889atapi_ioreq_cb(struct blockif_req *br, int err)
1890{
1891	struct ahci_cmd_hdr *hdr;
1892	struct ahci_ioreq *aior;
1893	struct ahci_port *p;
1894	struct pci_ahci_softc *sc;
1895	uint8_t *cfis;
1896	uint32_t tfd;
1897	int slot;
1898
1899	DPRINTF("%s %d\n", __func__, err);
1900
1901	aior = br->br_param;
1902	p = aior->io_pr;
1903	cfis = aior->cfis;
1904	slot = aior->slot;
1905	sc = p->pr_sc;
1906	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1907
1908	pthread_mutex_lock(&sc->mtx);
1909
1910	/*
1911	 * Delete the blockif request from the busy list
1912	 */
1913	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1914
1915	/*
1916	 * Move the blockif request back to the free list
1917	 */
1918	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1919
1920	if (!err)
1921		hdr->prdbc = aior->done;
1922
1923	if (!err && aior->more) {
1924		atapi_read(p, slot, cfis, aior->done);
1925		goto out;
1926	}
1927
1928	if (!err) {
1929		tfd = ATA_S_READY | ATA_S_DSC;
1930	} else {
1931		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1932		p->asc = 0x21;
1933		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1934	}
1935	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1936	ahci_write_fis_d2h(p, slot, cfis, tfd);
1937
1938	/*
1939	 * This command is now complete.
1940	 */
1941	p->pending &= ~(1 << slot);
1942
1943	ahci_check_stopped(p);
1944	ahci_handle_port(p);
1945out:
1946	pthread_mutex_unlock(&sc->mtx);
1947	DPRINTF("%s exit\n", __func__);
1948}
1949
1950static void
1951pci_ahci_ioreq_init(struct ahci_port *pr)
1952{
1953	struct ahci_ioreq *vr;
1954	int i;
1955
1956	pr->ioqsz = blockif_queuesz(pr->bctx);
1957	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1958	STAILQ_INIT(&pr->iofhd);
1959
1960	/*
1961	 * Add all i/o request entries to the free queue
1962	 */
1963	for (i = 0; i < pr->ioqsz; i++) {
1964		vr = &pr->ioreq[i];
1965		vr->io_pr = pr;
1966		if (!pr->atapi)
1967			vr->io_req.br_callback = ata_ioreq_cb;
1968		else
1969			vr->io_req.br_callback = atapi_ioreq_cb;
1970		vr->io_req.br_param = vr;
1971		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1972	}
1973
1974	TAILQ_INIT(&pr->iobhd);
1975}
1976
1977static void
1978pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1979{
1980	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1981	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1982	struct ahci_port *p = &sc->port[port];
1983
1984	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1985		port, offset, value);
1986
1987	switch (offset) {
1988	case AHCI_P_CLB:
1989		p->clb = value;
1990		break;
1991	case AHCI_P_CLBU:
1992		p->clbu = value;
1993		break;
1994	case AHCI_P_FB:
1995		p->fb = value;
1996		break;
1997	case AHCI_P_FBU:
1998		p->fbu = value;
1999		break;
2000	case AHCI_P_IS:
2001		p->is &= ~value;
2002		break;
2003	case AHCI_P_IE:
2004		p->ie = value & 0xFDC000FF;
2005		ahci_generate_intr(sc);
2006		break;
2007	case AHCI_P_CMD:
2008	{
2009		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2010		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2011		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2012		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
2013		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2014		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2015		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2016		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2017
2018		if (!(value & AHCI_P_CMD_ST)) {
2019			ahci_port_stop(p);
2020		} else {
2021			uint64_t clb;
2022
2023			p->cmd |= AHCI_P_CMD_CR;
2024			clb = (uint64_t)p->clbu << 32 | p->clb;
2025			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2026					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2027		}
2028
2029		if (value & AHCI_P_CMD_FRE) {
2030			uint64_t fb;
2031
2032			p->cmd |= AHCI_P_CMD_FR;
2033			fb = (uint64_t)p->fbu << 32 | p->fb;
2034			/* we don't support FBSCP, so rfis size is 256Bytes */
2035			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2036		} else {
2037			p->cmd &= ~AHCI_P_CMD_FR;
2038		}
2039
2040		if (value & AHCI_P_CMD_CLO) {
2041			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2042			p->cmd &= ~AHCI_P_CMD_CLO;
2043		}
2044
2045		if (value & AHCI_P_CMD_ICC_MASK) {
2046			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2047		}
2048
2049		ahci_handle_port(p);
2050		break;
2051	}
2052	case AHCI_P_TFD:
2053	case AHCI_P_SIG:
2054	case AHCI_P_SSTS:
2055		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
2056		break;
2057	case AHCI_P_SCTL:
2058		p->sctl = value;
2059		if (!(p->cmd & AHCI_P_CMD_ST)) {
2060			if (value & ATA_SC_DET_RESET)
2061				ahci_port_reset(p);
2062		}
2063		break;
2064	case AHCI_P_SERR:
2065		p->serr &= ~value;
2066		break;
2067	case AHCI_P_SACT:
2068		p->sact |= value;
2069		break;
2070	case AHCI_P_CI:
2071		p->ci |= value;
2072		ahci_handle_port(p);
2073		break;
2074	case AHCI_P_SNTF:
2075	case AHCI_P_FBS:
2076	default:
2077		break;
2078	}
2079}
2080
2081static void
2082pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2083{
2084	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2085		offset, value);
2086
2087	switch (offset) {
2088	case AHCI_CAP:
2089	case AHCI_PI:
2090	case AHCI_VS:
2091	case AHCI_CAP2:
2092		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2093		break;
2094	case AHCI_GHC:
2095		if (value & AHCI_GHC_HR)
2096			ahci_reset(sc);
2097		else if (value & AHCI_GHC_IE) {
2098			sc->ghc |= AHCI_GHC_IE;
2099			ahci_generate_intr(sc);
2100		}
2101		break;
2102	case AHCI_IS:
2103		sc->is &= ~value;
2104		ahci_generate_intr(sc);
2105		break;
2106	default:
2107		break;
2108	}
2109}
2110
2111static void
2112pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2113		int baridx, uint64_t offset, int size, uint64_t value)
2114{
2115	struct pci_ahci_softc *sc = pi->pi_arg;
2116
2117	assert(baridx == 5);
2118	assert((offset % 4) == 0 && size == 4);
2119
2120	pthread_mutex_lock(&sc->mtx);
2121
2122	if (offset < AHCI_OFFSET)
2123		pci_ahci_host_write(sc, offset, value);
2124	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2125		pci_ahci_port_write(sc, offset, value);
2126	else
2127		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2128
2129	pthread_mutex_unlock(&sc->mtx);
2130}
2131
2132static uint64_t
2133pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2134{
2135	uint32_t value;
2136
2137	switch (offset) {
2138	case AHCI_CAP:
2139	case AHCI_GHC:
2140	case AHCI_IS:
2141	case AHCI_PI:
2142	case AHCI_VS:
2143	case AHCI_CCCC:
2144	case AHCI_CCCP:
2145	case AHCI_EM_LOC:
2146	case AHCI_EM_CTL:
2147	case AHCI_CAP2:
2148	{
2149		uint32_t *p = &sc->cap;
2150		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2151		value = *p;
2152		break;
2153	}
2154	default:
2155		value = 0;
2156		break;
2157	}
2158	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2159		offset, value);
2160
2161	return (value);
2162}
2163
2164static uint64_t
2165pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2166{
2167	uint32_t value;
2168	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2169	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2170
2171	switch (offset) {
2172	case AHCI_P_CLB:
2173	case AHCI_P_CLBU:
2174	case AHCI_P_FB:
2175	case AHCI_P_FBU:
2176	case AHCI_P_IS:
2177	case AHCI_P_IE:
2178	case AHCI_P_CMD:
2179	case AHCI_P_TFD:
2180	case AHCI_P_SIG:
2181	case AHCI_P_SSTS:
2182	case AHCI_P_SCTL:
2183	case AHCI_P_SERR:
2184	case AHCI_P_SACT:
2185	case AHCI_P_CI:
2186	case AHCI_P_SNTF:
2187	case AHCI_P_FBS:
2188	{
2189		uint32_t *p= &sc->port[port].clb;
2190		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2191		value = *p;
2192		break;
2193	}
2194	default:
2195		value = 0;
2196		break;
2197	}
2198
2199	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2200		port, offset, value);
2201
2202	return value;
2203}
2204
2205static uint64_t
2206pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2207    uint64_t regoff, int size)
2208{
2209	struct pci_ahci_softc *sc = pi->pi_arg;
2210	uint64_t offset;
2211	uint32_t value;
2212
2213	assert(baridx == 5);
2214	assert(size == 1 || size == 2 || size == 4);
2215	assert((regoff & (size - 1)) == 0);
2216
2217	pthread_mutex_lock(&sc->mtx);
2218
2219	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2220	if (offset < AHCI_OFFSET)
2221		value = pci_ahci_host_read(sc, offset);
2222	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2223		value = pci_ahci_port_read(sc, offset);
2224	else {
2225		value = 0;
2226		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n",
2227		    regoff);
2228	}
2229	value >>= 8 * (regoff & 0x3);
2230
2231	pthread_mutex_unlock(&sc->mtx);
2232
2233	return (value);
2234}
2235
2236static int
2237pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2238{
2239	char bident[sizeof("XX:X:X")];
2240	struct blockif_ctxt *bctxt;
2241	struct pci_ahci_softc *sc;
2242	int ret, slots;
2243	MD5_CTX mdctx;
2244	u_char digest[16];
2245
2246	ret = 0;
2247
2248	if (opts == NULL) {
2249		fprintf(stderr, "pci_ahci: backing device required\n");
2250		return (1);
2251	}
2252
2253#ifdef AHCI_DEBUG
2254	dbg = fopen("/tmp/log", "w+");
2255#endif
2256
2257	sc = calloc(1, sizeof(struct pci_ahci_softc));
2258	pi->pi_arg = sc;
2259	sc->asc_pi = pi;
2260	sc->ports = MAX_PORTS;
2261
2262	/*
2263	 * Only use port 0 for a backing device. All other ports will be
2264	 * marked as unused
2265	 */
2266	sc->port[0].atapi = atapi;
2267
2268	/*
2269	 * Attempt to open the backing image. Use the PCI
2270	 * slot/func for the identifier string.
2271	 */
2272	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2273	bctxt = blockif_open(opts, bident);
2274	if (bctxt == NULL) {
2275		ret = 1;
2276		goto open_fail;
2277	}
2278	sc->port[0].bctx = bctxt;
2279	sc->port[0].pr_sc = sc;
2280
2281	/*
2282	 * Create an identifier for the backing file. Use parts of the
2283	 * md5 sum of the filename
2284	 */
2285	MD5Init(&mdctx);
2286	MD5Update(&mdctx, opts, strlen(opts));
2287	MD5Final(digest, &mdctx);
2288	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2289	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
2290
2291	/*
2292	 * Allocate blockif request structures and add them
2293	 * to the free list
2294	 */
2295	pci_ahci_ioreq_init(&sc->port[0]);
2296
2297	pthread_mutex_init(&sc->mtx, NULL);
2298
2299	/* Intel ICH8 AHCI */
2300	slots = sc->port[0].ioqsz;
2301	if (slots > 32)
2302		slots = 32;
2303	--slots;
2304	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2305	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2306	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2307	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2308	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2309
2310	/* Only port 0 implemented */
2311	sc->pi = 1;
2312	sc->vs = 0x10300;
2313	sc->cap2 = AHCI_CAP2_APST;
2314	ahci_reset(sc);
2315
2316	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2317	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2318	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2319	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2320	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2321	pci_emul_add_msicap(pi, 1);
2322	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2323	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2324
2325	pci_lintr_request(pi);
2326
2327open_fail:
2328	if (ret) {
2329		if (sc->port[0].bctx != NULL)
2330			blockif_close(sc->port[0].bctx);
2331		free(sc);
2332	}
2333
2334	return (ret);
2335}
2336
2337static int
2338pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2339{
2340
2341	return (pci_ahci_init(ctx, pi, opts, 0));
2342}
2343
2344static int
2345pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2346{
2347
2348	return (pci_ahci_init(ctx, pi, opts, 1));
2349}
2350
2351/*
2352 * Use separate emulation names to distinguish drive and atapi devices
2353 */
2354struct pci_devemu pci_de_ahci_hd = {
2355	.pe_emu =	"ahci-hd",
2356	.pe_init =	pci_ahci_hd_init,
2357	.pe_barwrite =	pci_ahci_write,
2358	.pe_barread =	pci_ahci_read
2359};
2360PCI_EMUL_SET(pci_de_ahci_hd);
2361
2362struct pci_devemu pci_de_ahci_cd = {
2363	.pe_emu =	"ahci-cd",
2364	.pe_init =	pci_ahci_atapi_init,
2365	.pe_barwrite =	pci_ahci_write,
2366	.pe_barread =	pci_ahci_read
2367};
2368PCI_EMUL_SET(pci_de_ahci_cd);
2369