pci_ahci.c revision 276349
1/*-
2 * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 276349 2014-12-28 21:27:13Z neel $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 276349 2014-12-28 21:27:13Z neel $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38#include <sys/ata.h>
39#include <sys/endian.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47#include <strings.h>
48#include <unistd.h>
49#include <assert.h>
50#include <pthread.h>
51#include <pthread_np.h>
52#include <inttypes.h>
53
54#include "bhyverun.h"
55#include "pci_emul.h"
56#include "ahci.h"
57#include "block_if.h"
58
59#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
60
61#define	PxSIG_ATA	0x00000101 /* ATA drive */
62#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
63
64enum sata_fis_type {
65	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
66	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
67	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
68	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
69	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
70	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
71	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
72	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
73};
74
75/*
76 * SCSI opcodes
77 */
78#define	TEST_UNIT_READY		0x00
79#define	REQUEST_SENSE		0x03
80#define	INQUIRY			0x12
81#define	START_STOP_UNIT		0x1B
82#define	PREVENT_ALLOW		0x1E
83#define	READ_CAPACITY		0x25
84#define	READ_10			0x28
85#define	POSITION_TO_ELEMENT	0x2B
86#define	READ_TOC		0x43
87#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
88#define	MODE_SENSE_10		0x5A
89#define	READ_12			0xA8
90#define	READ_CD			0xBE
91
92/*
93 * SCSI mode page codes
94 */
95#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
96#define	MODEPAGE_CD_CAPABILITIES	0x2A
97
98/*
99 * ATA commands
100 */
101#define	ATA_SF_ENAB_SATA_SF		0x10
102#define		ATA_SATA_SF_AN		0x05
103#define	ATA_SF_DIS_SATA_SF		0x90
104
105/*
106 * Debug printf
107 */
108#ifdef AHCI_DEBUG
109static FILE *dbg;
110#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
111#else
112#define DPRINTF(format, arg...)
113#endif
114#define WPRINTF(format, arg...) printf(format, ##arg)
115
116struct ahci_ioreq {
117	struct blockif_req io_req;
118	struct ahci_port *io_pr;
119	STAILQ_ENTRY(ahci_ioreq) io_flist;
120	TAILQ_ENTRY(ahci_ioreq) io_blist;
121	uint8_t *cfis;
122	uint32_t len;
123	uint32_t done;
124	int slot;
125	int prdtl;
126};
127
128struct ahci_port {
129	struct blockif_ctxt *bctx;
130	struct pci_ahci_softc *pr_sc;
131	uint8_t *cmd_lst;
132	uint8_t *rfis;
133	int atapi;
134	int reset;
135	int mult_sectors;
136	uint8_t xfermode;
137	uint8_t sense_key;
138	uint8_t asc;
139	uint32_t pending;
140
141	uint32_t clb;
142	uint32_t clbu;
143	uint32_t fb;
144	uint32_t fbu;
145	uint32_t is;
146	uint32_t ie;
147	uint32_t cmd;
148	uint32_t unused0;
149	uint32_t tfd;
150	uint32_t sig;
151	uint32_t ssts;
152	uint32_t sctl;
153	uint32_t serr;
154	uint32_t sact;
155	uint32_t ci;
156	uint32_t sntf;
157	uint32_t fbs;
158
159	/*
160	 * i/o request info
161	 */
162	struct ahci_ioreq *ioreq;
163	int ioqsz;
164	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
165	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
166};
167
168struct ahci_cmd_hdr {
169	uint16_t flags;
170	uint16_t prdtl;
171	uint32_t prdbc;
172	uint64_t ctba;
173	uint32_t reserved[4];
174};
175
176struct ahci_prdt_entry {
177	uint64_t dba;
178	uint32_t reserved;
179#define	DBCMASK		0x3fffff
180	uint32_t dbc;
181};
182
183struct pci_ahci_softc {
184	struct pci_devinst *asc_pi;
185	pthread_mutex_t	mtx;
186	int ports;
187	uint32_t cap;
188	uint32_t ghc;
189	uint32_t is;
190	uint32_t pi;
191	uint32_t vs;
192	uint32_t ccc_ctl;
193	uint32_t ccc_pts;
194	uint32_t em_loc;
195	uint32_t em_ctl;
196	uint32_t cap2;
197	uint32_t bohc;
198	uint32_t lintr;
199	struct ahci_port port[MAX_PORTS];
200};
201#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
202
203static inline void lba_to_msf(uint8_t *buf, int lba)
204{
205	lba += 150;
206	buf[0] = (lba / 75) / 60;
207	buf[1] = (lba / 75) % 60;
208	buf[2] = lba % 75;
209}
210
211/*
212 * generate HBA intr depending on whether or not ports within
213 * the controller have an interrupt pending.
214 */
215static void
216ahci_generate_intr(struct pci_ahci_softc *sc)
217{
218	struct pci_devinst *pi;
219	int i;
220
221	pi = sc->asc_pi;
222
223	for (i = 0; i < sc->ports; i++) {
224		struct ahci_port *pr;
225		pr = &sc->port[i];
226		if (pr->is & pr->ie)
227			sc->is |= (1 << i);
228	}
229
230	DPRINTF("%s %x\n", __func__, sc->is);
231
232	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
233		if (pci_msi_enabled(pi)) {
234			/*
235			 * Generate an MSI interrupt on every edge
236			 */
237			pci_generate_msi(pi, 0);
238		} else if (!sc->lintr) {
239			/*
240			 * Only generate a pin-based interrupt if one wasn't
241			 * in progress
242			 */
243			sc->lintr = 1;
244			pci_lintr_assert(pi);
245		}
246	} else if (sc->lintr) {
247		/*
248		 * No interrupts: deassert pin-based signal if it had
249		 * been asserted
250		 */
251		pci_lintr_deassert(pi);
252		sc->lintr = 0;
253	}
254}
255
256static void
257ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
258{
259	int offset, len, irq;
260
261	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
262		return;
263
264	switch (ft) {
265	case FIS_TYPE_REGD2H:
266		offset = 0x40;
267		len = 20;
268		irq = AHCI_P_IX_DHR;
269		break;
270	case FIS_TYPE_SETDEVBITS:
271		offset = 0x58;
272		len = 8;
273		irq = AHCI_P_IX_SDB;
274		break;
275	case FIS_TYPE_PIOSETUP:
276		offset = 0x20;
277		len = 20;
278		irq = 0;
279		break;
280	default:
281		WPRINTF("unsupported fis type %d\n", ft);
282		return;
283	}
284	memcpy(p->rfis + offset, fis, len);
285	if (irq) {
286		p->is |= irq;
287		ahci_generate_intr(p->pr_sc);
288	}
289}
290
291static void
292ahci_write_fis_piosetup(struct ahci_port *p)
293{
294	uint8_t fis[20];
295
296	memset(fis, 0, sizeof(fis));
297	fis[0] = FIS_TYPE_PIOSETUP;
298	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
299}
300
301static void
302ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
303{
304	uint8_t fis[8];
305	uint8_t error;
306
307	error = (tfd >> 8) & 0xff;
308	memset(fis, 0, sizeof(fis));
309	fis[0] = error;
310	fis[2] = tfd & 0x77;
311	*(uint32_t *)(fis + 4) = (1 << slot);
312	if (fis[2] & ATA_S_ERROR)
313		p->is |= AHCI_P_IX_TFE;
314	p->tfd = tfd;
315	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
316}
317
318static void
319ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
320{
321	uint8_t fis[20];
322	uint8_t error;
323
324	error = (tfd >> 8) & 0xff;
325	memset(fis, 0, sizeof(fis));
326	fis[0] = FIS_TYPE_REGD2H;
327	fis[1] = (1 << 6);
328	fis[2] = tfd & 0xff;
329	fis[3] = error;
330	fis[4] = cfis[4];
331	fis[5] = cfis[5];
332	fis[6] = cfis[6];
333	fis[7] = cfis[7];
334	fis[8] = cfis[8];
335	fis[9] = cfis[9];
336	fis[10] = cfis[10];
337	fis[11] = cfis[11];
338	fis[12] = cfis[12];
339	fis[13] = cfis[13];
340	if (fis[2] & ATA_S_ERROR)
341		p->is |= AHCI_P_IX_TFE;
342	else
343		p->ci &= ~(1 << slot);
344	p->tfd = tfd;
345	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
346}
347
348static void
349ahci_write_reset_fis_d2h(struct ahci_port *p)
350{
351	uint8_t fis[20];
352
353	memset(fis, 0, sizeof(fis));
354	fis[0] = FIS_TYPE_REGD2H;
355	fis[3] = 1;
356	fis[4] = 1;
357	if (p->atapi) {
358		fis[5] = 0x14;
359		fis[6] = 0xeb;
360	}
361	fis[12] = 1;
362	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
363}
364
365static void
366ahci_check_stopped(struct ahci_port *p)
367{
368	/*
369	 * If we are no longer processing the command list and nothing
370	 * is in-flight, clear the running bit.
371	 */
372	if (!(p->cmd & AHCI_P_CMD_ST)) {
373		if (p->pending == 0)
374			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
375	}
376}
377
378static void
379ahci_port_stop(struct ahci_port *p)
380{
381	struct ahci_ioreq *aior;
382	uint8_t *cfis;
383	int slot;
384	int ncq;
385	int error;
386
387	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
388
389	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
390		/*
391		 * Try to cancel the outstanding blockif request.
392		 */
393		error = blockif_cancel(p->bctx, &aior->io_req);
394		if (error != 0)
395			continue;
396
397		slot = aior->slot;
398		cfis = aior->cfis;
399		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
400		    cfis[2] == ATA_READ_FPDMA_QUEUED)
401			ncq = 1;
402
403		if (ncq)
404			p->sact &= ~(1 << slot);
405		else
406			p->ci &= ~(1 << slot);
407
408		/*
409		 * This command is now done.
410		 */
411		p->pending &= ~(1 << slot);
412
413		/*
414		 * Delete the blockif request from the busy list
415		 */
416		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
417
418		/*
419		 * Move the blockif request back to the free list
420		 */
421		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
422	}
423
424	ahci_check_stopped(p);
425}
426
427static void
428ahci_port_reset(struct ahci_port *pr)
429{
430	pr->sctl = 0;
431	pr->serr = 0;
432	pr->sact = 0;
433	pr->xfermode = ATA_UDMA6;
434	pr->mult_sectors = 128;
435
436	if (!pr->bctx) {
437		pr->ssts = ATA_SS_DET_NO_DEVICE;
438		pr->sig = 0xFFFFFFFF;
439		pr->tfd = 0x7F;
440		return;
441	}
442	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
443		ATA_SS_IPM_ACTIVE;
444	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
445	if (!pr->atapi) {
446		pr->sig = PxSIG_ATA;
447		pr->tfd |= ATA_S_READY;
448	} else
449		pr->sig = PxSIG_ATAPI;
450	ahci_write_reset_fis_d2h(pr);
451}
452
453static void
454ahci_reset(struct pci_ahci_softc *sc)
455{
456	int i;
457
458	sc->ghc = AHCI_GHC_AE;
459	sc->is = 0;
460
461	if (sc->lintr) {
462		pci_lintr_deassert(sc->asc_pi);
463		sc->lintr = 0;
464	}
465
466	for (i = 0; i < sc->ports; i++) {
467		sc->port[i].ie = 0;
468		sc->port[i].is = 0;
469		ahci_port_reset(&sc->port[i]);
470	}
471}
472
473static void
474ata_string(uint8_t *dest, const char *src, int len)
475{
476	int i;
477
478	for (i = 0; i < len; i++) {
479		if (*src)
480			dest[i ^ 1] = *src++;
481		else
482			dest[i ^ 1] = ' ';
483	}
484}
485
486static void
487atapi_string(uint8_t *dest, const char *src, int len)
488{
489	int i;
490
491	for (i = 0; i < len; i++) {
492		if (*src)
493			dest[i] = *src++;
494		else
495			dest[i] = ' ';
496	}
497}
498
499static void
500ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
501    int seek)
502{
503	struct ahci_ioreq *aior;
504	struct blockif_req *breq;
505	struct pci_ahci_softc *sc;
506	struct ahci_prdt_entry *prdt;
507	struct ahci_cmd_hdr *hdr;
508	uint64_t lba;
509	uint32_t len;
510	int i, err, iovcnt, ncq, readop;
511
512	sc = p->pr_sc;
513	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
514	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
515	ncq = 0;
516	readop = 1;
517
518	prdt += seek;
519	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
520			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
521		readop = 0;
522
523	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
524			cfis[2] == ATA_READ_FPDMA_QUEUED) {
525		lba = ((uint64_t)cfis[10] << 40) |
526			((uint64_t)cfis[9] << 32) |
527			((uint64_t)cfis[8] << 24) |
528			((uint64_t)cfis[6] << 16) |
529			((uint64_t)cfis[5] << 8) |
530			cfis[4];
531		len = cfis[11] << 8 | cfis[3];
532		if (!len)
533			len = 65536;
534		ncq = 1;
535	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
536		lba = ((uint64_t)cfis[10] << 40) |
537			((uint64_t)cfis[9] << 32) |
538			((uint64_t)cfis[8] << 24) |
539			((uint64_t)cfis[6] << 16) |
540			((uint64_t)cfis[5] << 8) |
541			cfis[4];
542		len = cfis[13] << 8 | cfis[12];
543		if (!len)
544			len = 65536;
545	} else {
546		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
547			(cfis[5] << 8) | cfis[4];
548		len = cfis[12];
549		if (!len)
550			len = 256;
551	}
552	lba *= blockif_sectsz(p->bctx);
553	len *= blockif_sectsz(p->bctx);
554
555	/*
556	 * Pull request off free list
557	 */
558	aior = STAILQ_FIRST(&p->iofhd);
559	assert(aior != NULL);
560	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
561	aior->cfis = cfis;
562	aior->slot = slot;
563	aior->len = len;
564	aior->done = done;
565	breq = &aior->io_req;
566	breq->br_offset = lba + done;
567	iovcnt = hdr->prdtl - seek;
568	if (iovcnt > BLOCKIF_IOV_MAX) {
569		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
570		iovcnt = BLOCKIF_IOV_MAX;
571	} else
572		aior->prdtl = 0;
573	breq->br_iovcnt = iovcnt;
574
575	/*
576	 * Mark this command in-flight.
577	 */
578	p->pending |= 1 << slot;
579
580	/*
581	 * Stuff request onto busy list
582	 */
583	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
584
585	/*
586	 * Build up the iovec based on the prdt
587	 */
588	for (i = 0; i < iovcnt; i++) {
589		uint32_t dbcsz;
590
591		dbcsz = (prdt->dbc & DBCMASK) + 1;
592		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
593		    prdt->dba, dbcsz);
594		breq->br_iov[i].iov_len = dbcsz;
595		aior->done += dbcsz;
596		prdt++;
597	}
598	if (readop)
599		err = blockif_read(p->bctx, breq);
600	else
601		err = blockif_write(p->bctx, breq);
602	assert(err == 0);
603
604	if (ncq)
605		p->ci &= ~(1 << slot);
606}
607
608static void
609ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
610{
611	struct ahci_ioreq *aior;
612	struct blockif_req *breq;
613	int err;
614
615	/*
616	 * Pull request off free list
617	 */
618	aior = STAILQ_FIRST(&p->iofhd);
619	assert(aior != NULL);
620	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
621	aior->cfis = cfis;
622	aior->slot = slot;
623	aior->len = 0;
624	aior->done = 0;
625	aior->prdtl = 0;
626	breq = &aior->io_req;
627
628	/*
629	 * Mark this command in-flight.
630	 */
631	p->pending |= 1 << slot;
632
633	/*
634	 * Stuff request onto busy list
635	 */
636	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
637
638	err = blockif_flush(p->bctx, breq);
639	assert(err == 0);
640}
641
642static inline void
643write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
644		void *buf, int size)
645{
646	struct ahci_cmd_hdr *hdr;
647	struct ahci_prdt_entry *prdt;
648	void *from;
649	int i, len;
650
651	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
652	len = size;
653	from = buf;
654	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
655	for (i = 0; i < hdr->prdtl && len; i++) {
656		uint8_t *ptr;
657		uint32_t dbcsz;
658		int sublen;
659
660		dbcsz = (prdt->dbc & DBCMASK) + 1;
661		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
662		sublen = len < dbcsz ? len : dbcsz;
663		memcpy(ptr, from, sublen);
664		len -= sublen;
665		from += sublen;
666		prdt++;
667	}
668	hdr->prdbc = size - len;
669}
670
671static void
672handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
673{
674	struct ahci_cmd_hdr *hdr;
675
676	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
677	if (p->atapi || hdr->prdtl == 0) {
678		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
679		p->is |= AHCI_P_IX_TFE;
680	} else {
681		uint16_t buf[256];
682		uint64_t sectors;
683		uint16_t cyl;
684		uint8_t sech, heads;
685
686		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
687		blockif_chs(p->bctx, &cyl, &heads, &sech);
688		memset(buf, 0, sizeof(buf));
689		buf[0] = 0x0040;
690		buf[1] = cyl;
691		buf[3] = heads;
692		buf[6] = sech;
693		/* TODO emulate different serial? */
694		ata_string((uint8_t *)(buf+10), "123456", 20);
695		ata_string((uint8_t *)(buf+23), "001", 8);
696		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
697		buf[47] = (0x8000 | 128);
698		buf[48] = 0x1;
699		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
700		buf[50] = (1 << 14);
701		buf[53] = (1 << 1 | 1 << 2);
702		if (p->mult_sectors)
703			buf[59] = (0x100 | p->mult_sectors);
704		buf[60] = sectors;
705		buf[61] = (sectors >> 16);
706		buf[63] = 0x7;
707		if (p->xfermode & ATA_WDMA0)
708			buf[63] |= (1 << ((p->xfermode & 7) + 8));
709		buf[64] = 0x3;
710		buf[65] = 100;
711		buf[66] = 100;
712		buf[67] = 100;
713		buf[68] = 100;
714		buf[75] = 31;
715		buf[76] = (1 << 8 | 1 << 2);
716		buf[80] = 0x1f0;
717		buf[81] = 0x28;
718		buf[82] = (1 << 5 | 1 << 14);
719		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
720		buf[84] = (1 << 14);
721		buf[85] = (1 << 5 | 1 << 14);
722		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
723		buf[87] = (1 << 14);
724		buf[88] = 0x7f;
725		if (p->xfermode & ATA_UDMA0)
726			buf[88] |= (1 << ((p->xfermode & 7) + 8));
727		buf[93] = (1 | 1 <<14);
728		buf[100] = sectors;
729		buf[101] = (sectors >> 16);
730		buf[102] = (sectors >> 32);
731		buf[103] = (sectors >> 48);
732		ahci_write_fis_piosetup(p);
733		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
734		p->tfd = ATA_S_DSC | ATA_S_READY;
735		p->is |= AHCI_P_IX_DP;
736		p->ci &= ~(1 << slot);
737	}
738	ahci_generate_intr(p->pr_sc);
739}
740
741static void
742handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
743{
744	if (!p->atapi) {
745		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
746		p->is |= AHCI_P_IX_TFE;
747	} else {
748		uint16_t buf[256];
749
750		memset(buf, 0, sizeof(buf));
751		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
752		/* TODO emulate different serial? */
753		ata_string((uint8_t *)(buf+10), "123456", 20);
754		ata_string((uint8_t *)(buf+23), "001", 8);
755		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
756		buf[49] = (1 << 9 | 1 << 8);
757		buf[50] = (1 << 14 | 1);
758		buf[53] = (1 << 2 | 1 << 1);
759		buf[62] = 0x3f;
760		buf[63] = 7;
761		buf[64] = 3;
762		buf[65] = 100;
763		buf[66] = 100;
764		buf[67] = 100;
765		buf[68] = 100;
766		buf[76] = (1 << 2 | 1 << 1);
767		buf[78] = (1 << 5);
768		buf[80] = (0x1f << 4);
769		buf[82] = (1 << 4);
770		buf[83] = (1 << 14);
771		buf[84] = (1 << 14);
772		buf[85] = (1 << 4);
773		buf[87] = (1 << 14);
774		buf[88] = (1 << 14 | 0x7f);
775		ahci_write_fis_piosetup(p);
776		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
777		p->tfd = ATA_S_DSC | ATA_S_READY;
778		p->is |= AHCI_P_IX_DHR;
779		p->ci &= ~(1 << slot);
780	}
781	ahci_generate_intr(p->pr_sc);
782}
783
784static void
785atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
786{
787	uint8_t buf[36];
788	uint8_t *acmd;
789	int len;
790
791	acmd = cfis + 0x40;
792
793	buf[0] = 0x05;
794	buf[1] = 0x80;
795	buf[2] = 0x00;
796	buf[3] = 0x21;
797	buf[4] = 31;
798	buf[5] = 0;
799	buf[6] = 0;
800	buf[7] = 0;
801	atapi_string(buf + 8, "BHYVE", 8);
802	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
803	atapi_string(buf + 32, "001", 4);
804
805	len = sizeof(buf);
806	if (len > acmd[4])
807		len = acmd[4];
808	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
809	write_prdt(p, slot, cfis, buf, len);
810	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
811}
812
813static void
814atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
815{
816	uint8_t buf[8];
817	uint64_t sectors;
818
819	sectors = blockif_size(p->bctx) / 2048;
820	be32enc(buf, sectors - 1);
821	be32enc(buf + 4, 2048);
822	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
823	write_prdt(p, slot, cfis, buf, sizeof(buf));
824	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
825}
826
827static void
828atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
829{
830	uint8_t *acmd;
831	uint8_t format;
832	int len;
833
834	acmd = cfis + 0x40;
835
836	len = be16dec(acmd + 7);
837	format = acmd[9] >> 6;
838	switch (format) {
839	case 0:
840	{
841		int msf, size;
842		uint64_t sectors;
843		uint8_t start_track, buf[20], *bp;
844
845		msf = (acmd[1] >> 1) & 1;
846		start_track = acmd[6];
847		if (start_track > 1 && start_track != 0xaa) {
848			uint32_t tfd;
849			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
850			p->asc = 0x24;
851			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
852			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
853			ahci_write_fis_d2h(p, slot, cfis, tfd);
854			return;
855		}
856		bp = buf + 2;
857		*bp++ = 1;
858		*bp++ = 1;
859		if (start_track <= 1) {
860			*bp++ = 0;
861			*bp++ = 0x14;
862			*bp++ = 1;
863			*bp++ = 0;
864			if (msf) {
865				*bp++ = 0;
866				lba_to_msf(bp, 0);
867				bp += 3;
868			} else {
869				*bp++ = 0;
870				*bp++ = 0;
871				*bp++ = 0;
872				*bp++ = 0;
873			}
874		}
875		*bp++ = 0;
876		*bp++ = 0x14;
877		*bp++ = 0xaa;
878		*bp++ = 0;
879		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
880		sectors >>= 2;
881		if (msf) {
882			*bp++ = 0;
883			lba_to_msf(bp, sectors);
884			bp += 3;
885		} else {
886			be32enc(bp, sectors);
887			bp += 4;
888		}
889		size = bp - buf;
890		be16enc(buf, size - 2);
891		if (len > size)
892			len = size;
893		write_prdt(p, slot, cfis, buf, len);
894		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
895		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
896		break;
897	}
898	case 1:
899	{
900		uint8_t buf[12];
901
902		memset(buf, 0, sizeof(buf));
903		buf[1] = 0xa;
904		buf[2] = 0x1;
905		buf[3] = 0x1;
906		if (len > sizeof(buf))
907			len = sizeof(buf);
908		write_prdt(p, slot, cfis, buf, len);
909		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
910		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
911		break;
912	}
913	case 2:
914	{
915		int msf, size;
916		uint64_t sectors;
917		uint8_t start_track, *bp, buf[50];
918
919		msf = (acmd[1] >> 1) & 1;
920		start_track = acmd[6];
921		bp = buf + 2;
922		*bp++ = 1;
923		*bp++ = 1;
924
925		*bp++ = 1;
926		*bp++ = 0x14;
927		*bp++ = 0;
928		*bp++ = 0xa0;
929		*bp++ = 0;
930		*bp++ = 0;
931		*bp++ = 0;
932		*bp++ = 0;
933		*bp++ = 1;
934		*bp++ = 0;
935		*bp++ = 0;
936
937		*bp++ = 1;
938		*bp++ = 0x14;
939		*bp++ = 0;
940		*bp++ = 0xa1;
941		*bp++ = 0;
942		*bp++ = 0;
943		*bp++ = 0;
944		*bp++ = 0;
945		*bp++ = 1;
946		*bp++ = 0;
947		*bp++ = 0;
948
949		*bp++ = 1;
950		*bp++ = 0x14;
951		*bp++ = 0;
952		*bp++ = 0xa2;
953		*bp++ = 0;
954		*bp++ = 0;
955		*bp++ = 0;
956		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
957		sectors >>= 2;
958		if (msf) {
959			*bp++ = 0;
960			lba_to_msf(bp, sectors);
961			bp += 3;
962		} else {
963			be32enc(bp, sectors);
964			bp += 4;
965		}
966
967		*bp++ = 1;
968		*bp++ = 0x14;
969		*bp++ = 0;
970		*bp++ = 1;
971		*bp++ = 0;
972		*bp++ = 0;
973		*bp++ = 0;
974		if (msf) {
975			*bp++ = 0;
976			lba_to_msf(bp, 0);
977			bp += 3;
978		} else {
979			*bp++ = 0;
980			*bp++ = 0;
981			*bp++ = 0;
982			*bp++ = 0;
983		}
984
985		size = bp - buf;
986		be16enc(buf, size - 2);
987		if (len > size)
988			len = size;
989		write_prdt(p, slot, cfis, buf, len);
990		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
991		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
992		break;
993	}
994	default:
995	{
996		uint32_t tfd;
997
998		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
999		p->asc = 0x24;
1000		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1001		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1002		ahci_write_fis_d2h(p, slot, cfis, tfd);
1003		break;
1004	}
1005	}
1006}
1007
1008static void
1009atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
1010		uint32_t done, int seek)
1011{
1012	struct ahci_ioreq *aior;
1013	struct ahci_cmd_hdr *hdr;
1014	struct ahci_prdt_entry *prdt;
1015	struct blockif_req *breq;
1016	struct pci_ahci_softc *sc;
1017	uint8_t *acmd;
1018	uint64_t lba;
1019	uint32_t len;
1020	int i, err, iovcnt;
1021
1022	sc = p->pr_sc;
1023	acmd = cfis + 0x40;
1024	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1025	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1026
1027	prdt += seek;
1028	lba = be32dec(acmd + 2);
1029	if (acmd[0] == READ_10)
1030		len = be16dec(acmd + 7);
1031	else
1032		len = be32dec(acmd + 6);
1033	if (len == 0) {
1034		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1035		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1036	}
1037	lba *= 2048;
1038	len *= 2048;
1039
1040	/*
1041	 * Pull request off free list
1042	 */
1043	aior = STAILQ_FIRST(&p->iofhd);
1044	assert(aior != NULL);
1045	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1046	aior->cfis = cfis;
1047	aior->slot = slot;
1048	aior->len = len;
1049	aior->done = done;
1050	breq = &aior->io_req;
1051	breq->br_offset = lba + done;
1052	iovcnt = hdr->prdtl - seek;
1053	if (iovcnt > BLOCKIF_IOV_MAX) {
1054		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
1055		iovcnt = BLOCKIF_IOV_MAX;
1056	} else
1057		aior->prdtl = 0;
1058	breq->br_iovcnt = iovcnt;
1059
1060	/*
1061	 * Mark this command in-flight.
1062	 */
1063	p->pending |= 1 << slot;
1064
1065	/*
1066	 * Stuff request onto busy list
1067	 */
1068	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1069
1070	/*
1071	 * Build up the iovec based on the prdt
1072	 */
1073	for (i = 0; i < iovcnt; i++) {
1074		uint32_t dbcsz;
1075
1076		dbcsz = (prdt->dbc & DBCMASK) + 1;
1077		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
1078		    prdt->dba, dbcsz);
1079		breq->br_iov[i].iov_len = dbcsz;
1080		aior->done += dbcsz;
1081		prdt++;
1082	}
1083	err = blockif_read(p->bctx, breq);
1084	assert(err == 0);
1085}
1086
1087static void
1088atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1089{
1090	uint8_t buf[64];
1091	uint8_t *acmd;
1092	int len;
1093
1094	acmd = cfis + 0x40;
1095	len = acmd[4];
1096	if (len > sizeof(buf))
1097		len = sizeof(buf);
1098	memset(buf, 0, len);
1099	buf[0] = 0x70 | (1 << 7);
1100	buf[2] = p->sense_key;
1101	buf[7] = 10;
1102	buf[12] = p->asc;
1103	write_prdt(p, slot, cfis, buf, len);
1104	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1105	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1106}
1107
1108static void
1109atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1110{
1111	uint8_t *acmd = cfis + 0x40;
1112	uint32_t tfd;
1113
1114	switch (acmd[4] & 3) {
1115	case 0:
1116	case 1:
1117	case 3:
1118		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1119		tfd = ATA_S_READY | ATA_S_DSC;
1120		break;
1121	case 2:
1122		/* TODO eject media */
1123		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1124		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1125		p->asc = 0x53;
1126		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1127		break;
1128	}
1129	ahci_write_fis_d2h(p, slot, cfis, tfd);
1130}
1131
1132static void
1133atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1134{
1135	uint8_t *acmd;
1136	uint32_t tfd;
1137	uint8_t pc, code;
1138	int len;
1139
1140	acmd = cfis + 0x40;
1141	len = be16dec(acmd + 7);
1142	pc = acmd[2] >> 6;
1143	code = acmd[2] & 0x3f;
1144
1145	switch (pc) {
1146	case 0:
1147		switch (code) {
1148		case MODEPAGE_RW_ERROR_RECOVERY:
1149		{
1150			uint8_t buf[16];
1151
1152			if (len > sizeof(buf))
1153				len = sizeof(buf);
1154
1155			memset(buf, 0, sizeof(buf));
1156			be16enc(buf, 16 - 2);
1157			buf[2] = 0x70;
1158			buf[8] = 0x01;
1159			buf[9] = 16 - 10;
1160			buf[11] = 0x05;
1161			write_prdt(p, slot, cfis, buf, len);
1162			tfd = ATA_S_READY | ATA_S_DSC;
1163			break;
1164		}
1165		case MODEPAGE_CD_CAPABILITIES:
1166		{
1167			uint8_t buf[30];
1168
1169			if (len > sizeof(buf))
1170				len = sizeof(buf);
1171
1172			memset(buf, 0, sizeof(buf));
1173			be16enc(buf, 30 - 2);
1174			buf[2] = 0x70;
1175			buf[8] = 0x2A;
1176			buf[9] = 30 - 10;
1177			buf[10] = 0x08;
1178			buf[12] = 0x71;
1179			be16enc(&buf[18], 2);
1180			be16enc(&buf[20], 512);
1181			write_prdt(p, slot, cfis, buf, len);
1182			tfd = ATA_S_READY | ATA_S_DSC;
1183			break;
1184		}
1185		default:
1186			goto error;
1187			break;
1188		}
1189		break;
1190	case 3:
1191		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1192		p->asc = 0x39;
1193		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1194		break;
1195error:
1196	case 1:
1197	case 2:
1198		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1199		p->asc = 0x24;
1200		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1201		break;
1202	}
1203	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1204	ahci_write_fis_d2h(p, slot, cfis, tfd);
1205}
1206
1207static void
1208atapi_get_event_status_notification(struct ahci_port *p, int slot,
1209    uint8_t *cfis)
1210{
1211	uint8_t *acmd;
1212	uint32_t tfd;
1213
1214	acmd = cfis + 0x40;
1215
1216	/* we don't support asynchronous operation */
1217	if (!(acmd[1] & 1)) {
1218		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1219		p->asc = 0x24;
1220		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1221	} else {
1222		uint8_t buf[8];
1223		int len;
1224
1225		len = be16dec(acmd + 7);
1226		if (len > sizeof(buf))
1227			len = sizeof(buf);
1228
1229		memset(buf, 0, sizeof(buf));
1230		be16enc(buf, 8 - 2);
1231		buf[2] = 0x04;
1232		buf[3] = 0x10;
1233		buf[5] = 0x02;
1234		write_prdt(p, slot, cfis, buf, len);
1235		tfd = ATA_S_READY | ATA_S_DSC;
1236	}
1237	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1238	ahci_write_fis_d2h(p, slot, cfis, tfd);
1239}
1240
1241static void
1242handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1243{
1244	uint8_t *acmd;
1245
1246	acmd = cfis + 0x40;
1247
1248#ifdef AHCI_DEBUG
1249	{
1250		int i;
1251		DPRINTF("ACMD:");
1252		for (i = 0; i < 16; i++)
1253			DPRINTF("%02x ", acmd[i]);
1254		DPRINTF("\n");
1255	}
1256#endif
1257
1258	switch (acmd[0]) {
1259	case TEST_UNIT_READY:
1260		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1261		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1262		break;
1263	case INQUIRY:
1264		atapi_inquiry(p, slot, cfis);
1265		break;
1266	case READ_CAPACITY:
1267		atapi_read_capacity(p, slot, cfis);
1268		break;
1269	case PREVENT_ALLOW:
1270		/* TODO */
1271		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1272		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1273		break;
1274	case READ_TOC:
1275		atapi_read_toc(p, slot, cfis);
1276		break;
1277	case READ_10:
1278	case READ_12:
1279		atapi_read(p, slot, cfis, 0, 0);
1280		break;
1281	case REQUEST_SENSE:
1282		atapi_request_sense(p, slot, cfis);
1283		break;
1284	case START_STOP_UNIT:
1285		atapi_start_stop_unit(p, slot, cfis);
1286		break;
1287	case MODE_SENSE_10:
1288		atapi_mode_sense(p, slot, cfis);
1289		break;
1290	case GET_EVENT_STATUS_NOTIFICATION:
1291		atapi_get_event_status_notification(p, slot, cfis);
1292		break;
1293	default:
1294		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1295		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1296		p->asc = 0x20;
1297		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1298				ATA_S_READY | ATA_S_ERROR);
1299		break;
1300	}
1301}
1302
1303static void
1304ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1305{
1306
1307	switch (cfis[2]) {
1308	case ATA_ATA_IDENTIFY:
1309		handle_identify(p, slot, cfis);
1310		break;
1311	case ATA_SETFEATURES:
1312	{
1313		switch (cfis[3]) {
1314		case ATA_SF_ENAB_SATA_SF:
1315			switch (cfis[12]) {
1316			case ATA_SATA_SF_AN:
1317				p->tfd = ATA_S_DSC | ATA_S_READY;
1318				break;
1319			default:
1320				p->tfd = ATA_S_ERROR | ATA_S_READY;
1321				p->tfd |= (ATA_ERROR_ABORT << 8);
1322				break;
1323			}
1324			break;
1325		case ATA_SF_ENAB_WCACHE:
1326		case ATA_SF_DIS_WCACHE:
1327		case ATA_SF_ENAB_RCACHE:
1328		case ATA_SF_DIS_RCACHE:
1329			p->tfd = ATA_S_DSC | ATA_S_READY;
1330			break;
1331		case ATA_SF_SETXFER:
1332		{
1333			switch (cfis[12] & 0xf8) {
1334			case ATA_PIO:
1335			case ATA_PIO0:
1336				break;
1337			case ATA_WDMA0:
1338			case ATA_UDMA0:
1339				p->xfermode = (cfis[12] & 0x7);
1340				break;
1341			}
1342			p->tfd = ATA_S_DSC | ATA_S_READY;
1343			break;
1344		}
1345		default:
1346			p->tfd = ATA_S_ERROR | ATA_S_READY;
1347			p->tfd |= (ATA_ERROR_ABORT << 8);
1348			break;
1349		}
1350		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1351		break;
1352	}
1353	case ATA_SET_MULTI:
1354		if (cfis[12] != 0 &&
1355			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1356			p->tfd = ATA_S_ERROR | ATA_S_READY;
1357			p->tfd |= (ATA_ERROR_ABORT << 8);
1358		} else {
1359			p->mult_sectors = cfis[12];
1360			p->tfd = ATA_S_DSC | ATA_S_READY;
1361		}
1362		p->is |= AHCI_P_IX_DP;
1363		p->ci &= ~(1 << slot);
1364		ahci_generate_intr(p->pr_sc);
1365		break;
1366	case ATA_READ_DMA:
1367	case ATA_WRITE_DMA:
1368	case ATA_READ_DMA48:
1369	case ATA_WRITE_DMA48:
1370	case ATA_READ_FPDMA_QUEUED:
1371	case ATA_WRITE_FPDMA_QUEUED:
1372		ahci_handle_dma(p, slot, cfis, 0, 0);
1373		break;
1374	case ATA_FLUSHCACHE:
1375	case ATA_FLUSHCACHE48:
1376		ahci_handle_flush(p, slot, cfis);
1377		break;
1378	case ATA_STANDBY_CMD:
1379		break;
1380	case ATA_NOP:
1381	case ATA_STANDBY_IMMEDIATE:
1382	case ATA_IDLE_IMMEDIATE:
1383	case ATA_SLEEP:
1384		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1385		break;
1386	case ATA_ATAPI_IDENTIFY:
1387		handle_atapi_identify(p, slot, cfis);
1388		break;
1389	case ATA_PACKET_CMD:
1390		if (!p->atapi) {
1391			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1392			p->is |= AHCI_P_IX_TFE;
1393			ahci_generate_intr(p->pr_sc);
1394		} else
1395			handle_packet_cmd(p, slot, cfis);
1396		break;
1397	default:
1398		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1399		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1400		p->is |= AHCI_P_IX_TFE;
1401		ahci_generate_intr(p->pr_sc);
1402		break;
1403	}
1404}
1405
1406static void
1407ahci_handle_slot(struct ahci_port *p, int slot)
1408{
1409	struct ahci_cmd_hdr *hdr;
1410	struct ahci_prdt_entry *prdt;
1411	struct pci_ahci_softc *sc;
1412	uint8_t *cfis;
1413	int cfl;
1414
1415	sc = p->pr_sc;
1416	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1417	cfl = (hdr->flags & 0x1f) * 4;
1418	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1419			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1420	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1421
1422#ifdef AHCI_DEBUG
1423	DPRINTF("\ncfis:");
1424	for (i = 0; i < cfl; i++) {
1425		if (i % 10 == 0)
1426			DPRINTF("\n");
1427		DPRINTF("%02x ", cfis[i]);
1428	}
1429	DPRINTF("\n");
1430
1431	for (i = 0; i < hdr->prdtl; i++) {
1432		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1433		prdt++;
1434	}
1435#endif
1436
1437	if (cfis[0] != FIS_TYPE_REGH2D) {
1438		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1439		return;
1440	}
1441
1442	if (cfis[1] & 0x80) {
1443		ahci_handle_cmd(p, slot, cfis);
1444	} else {
1445		if (cfis[15] & (1 << 2))
1446			p->reset = 1;
1447		else if (p->reset) {
1448			p->reset = 0;
1449			ahci_port_reset(p);
1450		}
1451		p->ci &= ~(1 << slot);
1452	}
1453}
1454
1455static void
1456ahci_handle_port(struct ahci_port *p)
1457{
1458	int i;
1459
1460	if (!(p->cmd & AHCI_P_CMD_ST))
1461		return;
1462
1463	/*
1464	 * Search for any new commands to issue ignoring those that
1465	 * are already in-flight.
1466	 */
1467	for (i = 0; (i < 32) && p->ci; i++) {
1468		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1469			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1470			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1471			ahci_handle_slot(p, i);
1472		}
1473	}
1474}
1475
1476/*
1477 * blockif callback routine - this runs in the context of the blockif
1478 * i/o thread, so the mutex needs to be acquired.
1479 */
1480static void
1481ata_ioreq_cb(struct blockif_req *br, int err)
1482{
1483	struct ahci_cmd_hdr *hdr;
1484	struct ahci_ioreq *aior;
1485	struct ahci_port *p;
1486	struct pci_ahci_softc *sc;
1487	uint32_t tfd;
1488	uint8_t *cfis;
1489	int pending, slot, ncq;
1490
1491	DPRINTF("%s %d\n", __func__, err);
1492
1493	ncq = 0;
1494	aior = br->br_param;
1495	p = aior->io_pr;
1496	cfis = aior->cfis;
1497	slot = aior->slot;
1498	pending = aior->prdtl;
1499	sc = p->pr_sc;
1500	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1501
1502	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1503			cfis[2] == ATA_READ_FPDMA_QUEUED)
1504		ncq = 1;
1505
1506	pthread_mutex_lock(&sc->mtx);
1507
1508	/*
1509	 * Delete the blockif request from the busy list
1510	 */
1511	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1512
1513	/*
1514	 * Move the blockif request back to the free list
1515	 */
1516	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1517
1518	if (pending && !err) {
1519		ahci_handle_dma(p, slot, cfis, aior->done,
1520		    hdr->prdtl - pending);
1521		goto out;
1522	}
1523
1524	if (!err && aior->done == aior->len) {
1525		tfd = ATA_S_READY | ATA_S_DSC;
1526		if (ncq)
1527			hdr->prdbc = 0;
1528		else
1529			hdr->prdbc = aior->len;
1530	} else {
1531		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1532		hdr->prdbc = 0;
1533		if (ncq)
1534			p->serr |= (1 << slot);
1535	}
1536
1537	if (ncq) {
1538		p->sact &= ~(1 << slot);
1539		ahci_write_fis_sdb(p, slot, tfd);
1540	} else
1541		ahci_write_fis_d2h(p, slot, cfis, tfd);
1542
1543	/*
1544	 * This command is now complete.
1545	 */
1546	p->pending &= ~(1 << slot);
1547
1548	ahci_check_stopped(p);
1549out:
1550	pthread_mutex_unlock(&sc->mtx);
1551	DPRINTF("%s exit\n", __func__);
1552}
1553
1554static void
1555atapi_ioreq_cb(struct blockif_req *br, int err)
1556{
1557	struct ahci_cmd_hdr *hdr;
1558	struct ahci_ioreq *aior;
1559	struct ahci_port *p;
1560	struct pci_ahci_softc *sc;
1561	uint8_t *cfis;
1562	uint32_t tfd;
1563	int pending, slot;
1564
1565	DPRINTF("%s %d\n", __func__, err);
1566
1567	aior = br->br_param;
1568	p = aior->io_pr;
1569	cfis = aior->cfis;
1570	slot = aior->slot;
1571	pending = aior->prdtl;
1572	sc = p->pr_sc;
1573	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1574
1575	pthread_mutex_lock(&sc->mtx);
1576
1577	/*
1578	 * Delete the blockif request from the busy list
1579	 */
1580	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1581
1582	/*
1583	 * Move the blockif request back to the free list
1584	 */
1585	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1586
1587	if (pending && !err) {
1588		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1589		goto out;
1590	}
1591
1592	if (!err && aior->done == aior->len) {
1593		tfd = ATA_S_READY | ATA_S_DSC;
1594		hdr->prdbc = aior->len;
1595	} else {
1596		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1597		p->asc = 0x21;
1598		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1599		hdr->prdbc = 0;
1600	}
1601
1602	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1603	ahci_write_fis_d2h(p, slot, cfis, tfd);
1604
1605	/*
1606	 * This command is now complete.
1607	 */
1608	p->pending &= ~(1 << slot);
1609
1610	ahci_check_stopped(p);
1611out:
1612	pthread_mutex_unlock(&sc->mtx);
1613	DPRINTF("%s exit\n", __func__);
1614}
1615
1616static void
1617pci_ahci_ioreq_init(struct ahci_port *pr)
1618{
1619	struct ahci_ioreq *vr;
1620	int i;
1621
1622	pr->ioqsz = blockif_queuesz(pr->bctx);
1623	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1624	STAILQ_INIT(&pr->iofhd);
1625
1626	/*
1627	 * Add all i/o request entries to the free queue
1628	 */
1629	for (i = 0; i < pr->ioqsz; i++) {
1630		vr = &pr->ioreq[i];
1631		vr->io_pr = pr;
1632		if (!pr->atapi)
1633			vr->io_req.br_callback = ata_ioreq_cb;
1634		else
1635			vr->io_req.br_callback = atapi_ioreq_cb;
1636		vr->io_req.br_param = vr;
1637		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1638	}
1639
1640	TAILQ_INIT(&pr->iobhd);
1641}
1642
1643static void
1644pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1645{
1646	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1647	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1648	struct ahci_port *p = &sc->port[port];
1649
1650	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1651		port, offset, value);
1652
1653	switch (offset) {
1654	case AHCI_P_CLB:
1655		p->clb = value;
1656		break;
1657	case AHCI_P_CLBU:
1658		p->clbu = value;
1659		break;
1660	case AHCI_P_FB:
1661		p->fb = value;
1662		break;
1663	case AHCI_P_FBU:
1664		p->fbu = value;
1665		break;
1666	case AHCI_P_IS:
1667		p->is &= ~value;
1668		break;
1669	case AHCI_P_IE:
1670		p->ie = value & 0xFDC000FF;
1671		ahci_generate_intr(sc);
1672		break;
1673	case AHCI_P_CMD:
1674	{
1675		p->cmd = value;
1676
1677		if (!(value & AHCI_P_CMD_ST)) {
1678			ahci_port_stop(p);
1679		} else {
1680			uint64_t clb;
1681
1682			p->cmd |= AHCI_P_CMD_CR;
1683			clb = (uint64_t)p->clbu << 32 | p->clb;
1684			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1685					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1686		}
1687
1688		if (value & AHCI_P_CMD_FRE) {
1689			uint64_t fb;
1690
1691			p->cmd |= AHCI_P_CMD_FR;
1692			fb = (uint64_t)p->fbu << 32 | p->fb;
1693			/* we don't support FBSCP, so rfis size is 256Bytes */
1694			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1695		} else {
1696			p->cmd &= ~AHCI_P_CMD_FR;
1697		}
1698
1699		if (value & AHCI_P_CMD_CLO) {
1700			p->tfd = 0;
1701			p->cmd &= ~AHCI_P_CMD_CLO;
1702		}
1703
1704		ahci_handle_port(p);
1705		break;
1706	}
1707	case AHCI_P_TFD:
1708	case AHCI_P_SIG:
1709	case AHCI_P_SSTS:
1710		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1711		break;
1712	case AHCI_P_SCTL:
1713		if (!(p->cmd & AHCI_P_CMD_ST)) {
1714			if (value & ATA_SC_DET_RESET)
1715				ahci_port_reset(p);
1716			p->sctl = value;
1717		}
1718		break;
1719	case AHCI_P_SERR:
1720		p->serr &= ~value;
1721		break;
1722	case AHCI_P_SACT:
1723		p->sact |= value;
1724		break;
1725	case AHCI_P_CI:
1726		p->ci |= value;
1727		ahci_handle_port(p);
1728		break;
1729	case AHCI_P_SNTF:
1730	case AHCI_P_FBS:
1731	default:
1732		break;
1733	}
1734}
1735
1736static void
1737pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1738{
1739	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1740		offset, value);
1741
1742	switch (offset) {
1743	case AHCI_CAP:
1744	case AHCI_PI:
1745	case AHCI_VS:
1746	case AHCI_CAP2:
1747		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1748		break;
1749	case AHCI_GHC:
1750		if (value & AHCI_GHC_HR)
1751			ahci_reset(sc);
1752		else if (value & AHCI_GHC_IE) {
1753			sc->ghc |= AHCI_GHC_IE;
1754			ahci_generate_intr(sc);
1755		}
1756		break;
1757	case AHCI_IS:
1758		sc->is &= ~value;
1759		ahci_generate_intr(sc);
1760		break;
1761	default:
1762		break;
1763	}
1764}
1765
1766static void
1767pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1768		int baridx, uint64_t offset, int size, uint64_t value)
1769{
1770	struct pci_ahci_softc *sc = pi->pi_arg;
1771
1772	assert(baridx == 5);
1773	assert(size == 4);
1774
1775	pthread_mutex_lock(&sc->mtx);
1776
1777	if (offset < AHCI_OFFSET)
1778		pci_ahci_host_write(sc, offset, value);
1779	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1780		pci_ahci_port_write(sc, offset, value);
1781	else
1782		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1783
1784	pthread_mutex_unlock(&sc->mtx);
1785}
1786
1787static uint64_t
1788pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1789{
1790	uint32_t value;
1791
1792	switch (offset) {
1793	case AHCI_CAP:
1794	case AHCI_GHC:
1795	case AHCI_IS:
1796	case AHCI_PI:
1797	case AHCI_VS:
1798	case AHCI_CCCC:
1799	case AHCI_CCCP:
1800	case AHCI_EM_LOC:
1801	case AHCI_EM_CTL:
1802	case AHCI_CAP2:
1803	{
1804		uint32_t *p = &sc->cap;
1805		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1806		value = *p;
1807		break;
1808	}
1809	default:
1810		value = 0;
1811		break;
1812	}
1813	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1814		offset, value);
1815
1816	return (value);
1817}
1818
1819static uint64_t
1820pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1821{
1822	uint32_t value;
1823	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1824	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1825
1826	switch (offset) {
1827	case AHCI_P_CLB:
1828	case AHCI_P_CLBU:
1829	case AHCI_P_FB:
1830	case AHCI_P_FBU:
1831	case AHCI_P_IS:
1832	case AHCI_P_IE:
1833	case AHCI_P_CMD:
1834	case AHCI_P_TFD:
1835	case AHCI_P_SIG:
1836	case AHCI_P_SSTS:
1837	case AHCI_P_SCTL:
1838	case AHCI_P_SERR:
1839	case AHCI_P_SACT:
1840	case AHCI_P_CI:
1841	case AHCI_P_SNTF:
1842	case AHCI_P_FBS:
1843	{
1844		uint32_t *p= &sc->port[port].clb;
1845		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1846		value = *p;
1847		break;
1848	}
1849	default:
1850		value = 0;
1851		break;
1852	}
1853
1854	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
1855		port, offset, value);
1856
1857	return value;
1858}
1859
1860static uint64_t
1861pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1862    uint64_t offset, int size)
1863{
1864	struct pci_ahci_softc *sc = pi->pi_arg;
1865	uint32_t value;
1866
1867	assert(baridx == 5);
1868	assert(size == 4);
1869
1870	pthread_mutex_lock(&sc->mtx);
1871
1872	if (offset < AHCI_OFFSET)
1873		value = pci_ahci_host_read(sc, offset);
1874	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1875		value = pci_ahci_port_read(sc, offset);
1876	else {
1877		value = 0;
1878		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
1879	}
1880
1881	pthread_mutex_unlock(&sc->mtx);
1882
1883	return (value);
1884}
1885
1886static int
1887pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
1888{
1889	char bident[sizeof("XX:X:X")];
1890	struct blockif_ctxt *bctxt;
1891	struct pci_ahci_softc *sc;
1892	int ret, slots;
1893
1894	ret = 0;
1895
1896	if (opts == NULL) {
1897		fprintf(stderr, "pci_ahci: backing device required\n");
1898		return (1);
1899	}
1900
1901#ifdef AHCI_DEBUG
1902	dbg = fopen("/tmp/log", "w+");
1903#endif
1904
1905	sc = calloc(1, sizeof(struct pci_ahci_softc));
1906	pi->pi_arg = sc;
1907	sc->asc_pi = pi;
1908	sc->ports = MAX_PORTS;
1909
1910	/*
1911	 * Only use port 0 for a backing device. All other ports will be
1912	 * marked as unused
1913	 */
1914	sc->port[0].atapi = atapi;
1915
1916	/*
1917	 * Attempt to open the backing image. Use the PCI
1918	 * slot/func for the identifier string.
1919	 */
1920	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
1921	bctxt = blockif_open(opts, bident);
1922	if (bctxt == NULL) {
1923		ret = 1;
1924		goto open_fail;
1925	}
1926	sc->port[0].bctx = bctxt;
1927	sc->port[0].pr_sc = sc;
1928
1929	/*
1930	 * Allocate blockif request structures and add them
1931	 * to the free list
1932	 */
1933	pci_ahci_ioreq_init(&sc->port[0]);
1934
1935	pthread_mutex_init(&sc->mtx, NULL);
1936
1937	/* Intel ICH8 AHCI */
1938	slots = sc->port[0].ioqsz;
1939	if (slots > 32)
1940		slots = 32;
1941	--slots;
1942	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
1943	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
1944	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
1945	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
1946	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
1947
1948	/* Only port 0 implemented */
1949	sc->pi = 1;
1950	sc->vs = 0x10300;
1951	sc->cap2 = AHCI_CAP2_APST;
1952	ahci_reset(sc);
1953
1954	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
1955	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
1956	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
1957	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
1958	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
1959	pci_emul_add_msicap(pi, 1);
1960	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
1961	    AHCI_OFFSET + sc->ports * AHCI_STEP);
1962
1963	pci_lintr_request(pi);
1964
1965open_fail:
1966	if (ret) {
1967		blockif_close(sc->port[0].bctx);
1968		free(sc);
1969	}
1970
1971	return (ret);
1972}
1973
1974static int
1975pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1976{
1977
1978	return (pci_ahci_init(ctx, pi, opts, 0));
1979}
1980
1981static int
1982pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1983{
1984
1985	return (pci_ahci_init(ctx, pi, opts, 1));
1986}
1987
1988/*
1989 * Use separate emulation names to distinguish drive and atapi devices
1990 */
1991struct pci_devemu pci_de_ahci_hd = {
1992	.pe_emu =	"ahci-hd",
1993	.pe_init =	pci_ahci_hd_init,
1994	.pe_barwrite =	pci_ahci_write,
1995	.pe_barread =	pci_ahci_read
1996};
1997PCI_EMUL_SET(pci_de_ahci_hd);
1998
1999struct pci_devemu pci_de_ahci_cd = {
2000	.pe_emu =	"ahci-cd",
2001	.pe_init =	pci_ahci_atapi_init,
2002	.pe_barwrite =	pci_ahci_write,
2003	.pe_barread =	pci_ahci_read
2004};
2005PCI_EMUL_SET(pci_de_ahci_cd);
2006