pci_ahci.c revision 280735
1219732Sume/*-
2282746Sgjb * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3219732Sume * All rights reserved.
4219732Sume *
5219732Sume * Redistribution and use in source and binary forms, with or without
6219732Sume * modification, are permitted provided that the following conditions
7219732Sume * are met:
8219732Sume * 1. Redistributions of source code must retain the above copyright
9219732Sume *    notice, this list of conditions and the following disclaimer.
10219732Sume * 2. Redistributions in binary form must reproduce the above copyright
11219732Sume *    notice, this list of conditions and the following disclaimer in the
12219732Sume *    documentation and/or other materials provided with the distribution.
13219732Sume *
14219732Sume * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15219732Sume * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16219732Sume * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17219732Sume * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18219732Sume * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19219732Sume * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20219732Sume * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21219732Sume * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22219732Sume * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23219732Sume * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24219732Sume * SUCH DAMAGE.
25219732Sume *
26219732Sume * $FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 280735 2015-03-27 08:46:12Z mav $
27219732Sume */
28219732Sume
29219732Sume#include <sys/cdefs.h>
30219732Sume__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_ahci.c 280735 2015-03-27 08:46:12Z mav $");
31282746Sgjb
32282746Sgjb#include <sys/param.h>
33282746Sgjb#include <sys/linker_set.h>
34282746Sgjb#include <sys/stat.h>
35282746Sgjb#include <sys/uio.h>
36282746Sgjb#include <sys/ioctl.h>
37282746Sgjb#include <sys/disk.h>
38282746Sgjb#include <sys/ata.h>
39282746Sgjb#include <sys/endian.h>
40282746Sgjb
41282746Sgjb#include <errno.h>
42219732Sume#include <fcntl.h>
43219732Sume#include <stdio.h>
44219732Sume#include <stdlib.h>
45219732Sume#include <stdint.h>
46219732Sume#include <string.h>
47219732Sume#include <strings.h>
48219732Sume#include <unistd.h>
49219732Sume#include <assert.h>
50219732Sume#include <pthread.h>
51219732Sume#include <pthread_np.h>
52219732Sume#include <inttypes.h>
53282746Sgjb
54219732Sume#include "bhyverun.h"
55219732Sume#include "pci_emul.h"
56219732Sume#include "ahci.h"
57282746Sgjb#include "block_if.h"
58282746Sgjb
59219732Sume#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
60282746Sgjb
61282746Sgjb#define	PxSIG_ATA	0x00000101 /* ATA drive */
62282746Sgjb#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
63282746Sgjb
64219732Sumeenum sata_fis_type {
65219732Sume	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
66219732Sume	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
67219732Sume	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
68219732Sume	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
69219732Sume	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
70219732Sume	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
71219732Sume	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
72219732Sume	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
73219732Sume};
74219732Sume
75219732Sume/*
76219732Sume * SCSI opcodes
77219732Sume */
78219732Sume#define	TEST_UNIT_READY		0x00
79219732Sume#define	REQUEST_SENSE		0x03
80219732Sume#define	INQUIRY			0x12
81219732Sume#define	START_STOP_UNIT		0x1B
82219732Sume#define	PREVENT_ALLOW		0x1E
83282746Sgjb#define	READ_CAPACITY		0x25
84219732Sume#define	READ_10			0x28
85219732Sume#define	POSITION_TO_ELEMENT	0x2B
86219732Sume#define	READ_TOC		0x43
87219732Sume#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
88219732Sume#define	MODE_SENSE_10		0x5A
89219732Sume#define	READ_12			0xA8
90219732Sume#define	READ_CD			0xBE
91219732Sume
92219732Sume/*
93219732Sume * SCSI mode page codes
94219732Sume */
95219732Sume#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
96219732Sume#define	MODEPAGE_CD_CAPABILITIES	0x2A
97219732Sume
98219732Sume/*
99219732Sume * ATA commands
100219732Sume */
101219732Sume#define	ATA_SF_ENAB_SATA_SF		0x10
102219732Sume#define		ATA_SATA_SF_AN		0x05
103219732Sume#define	ATA_SF_DIS_SATA_SF		0x90
104282746Sgjb
105282746Sgjb/*
106282746Sgjb * Debug printf
107219732Sume */
108219732Sume#ifdef AHCI_DEBUG
109219732Sumestatic FILE *dbg;
110219732Sume#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
111282746Sgjb#else
112282746Sgjb#define DPRINTF(format, arg...)
113282746Sgjb#endif
114282746Sgjb#define WPRINTF(format, arg...) printf(format, ##arg)
115282746Sgjb
116282746Sgjbstruct ahci_ioreq {
117282746Sgjb	struct blockif_req io_req;
118219732Sume	struct ahci_port *io_pr;
119219732Sume	STAILQ_ENTRY(ahci_ioreq) io_flist;
120282746Sgjb	TAILQ_ENTRY(ahci_ioreq) io_blist;
121219732Sume	uint8_t *cfis;
122219732Sume	uint32_t len;
123219732Sume	uint32_t done;
124219732Sume	int slot;
125219732Sume	int prdtl;
126219732Sume};
127219732Sume
128282746Sgjbstruct ahci_port {
129219732Sume	struct blockif_ctxt *bctx;
130219732Sume	struct pci_ahci_softc *pr_sc;
131219732Sume	uint8_t *cmd_lst;
132282746Sgjb	uint8_t *rfis;
133219732Sume	int atapi;
134219732Sume	int reset;
135219732Sume	int mult_sectors;
136219732Sume	uint8_t xfermode;
137219732Sume	uint8_t sense_key;
138219732Sume	uint8_t asc;
139219732Sume	uint32_t pending;
140219732Sume
141219732Sume	uint32_t clb;
142219732Sume	uint32_t clbu;
143219732Sume	uint32_t fb;
144219732Sume	uint32_t fbu;
145282746Sgjb	uint32_t is;
146282746Sgjb	uint32_t ie;
147282746Sgjb	uint32_t cmd;
148219732Sume	uint32_t unused0;
149219732Sume	uint32_t tfd;
150219732Sume	uint32_t sig;
151219732Sume	uint32_t ssts;
152219732Sume	uint32_t sctl;
153282746Sgjb	uint32_t serr;
154282746Sgjb	uint32_t sact;
155282746Sgjb	uint32_t ci;
156282746Sgjb	uint32_t sntf;
157282746Sgjb	uint32_t fbs;
158282746Sgjb
159282746Sgjb	/*
160282746Sgjb	 * i/o request info
161282746Sgjb	 */
162282746Sgjb	struct ahci_ioreq *ioreq;
163282746Sgjb	int ioqsz;
164219732Sume	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
165282746Sgjb	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
166282746Sgjb};
167282746Sgjb
168282746Sgjbstruct ahci_cmd_hdr {
169282746Sgjb	uint16_t flags;
170219732Sume	uint16_t prdtl;
171219732Sume	uint32_t prdbc;
172282746Sgjb	uint64_t ctba;
173282746Sgjb	uint32_t reserved[4];
174282746Sgjb};
175219732Sume
176219732Sumestruct ahci_prdt_entry {
177219732Sume	uint64_t dba;
178219732Sume	uint32_t reserved;
179219732Sume#define	DBCMASK		0x3fffff
180219732Sume	uint32_t dbc;
181219732Sume};
182219732Sume
183219732Sumestruct pci_ahci_softc {
184219732Sume	struct pci_devinst *asc_pi;
185219732Sume	pthread_mutex_t	mtx;
186219732Sume	int ports;
187219732Sume	uint32_t cap;
188219732Sume	uint32_t ghc;
189219732Sume	uint32_t is;
190219732Sume	uint32_t pi;
191219732Sume	uint32_t vs;
192219732Sume	uint32_t ccc_ctl;
193219732Sume	uint32_t ccc_pts;
194219732Sume	uint32_t em_loc;
195219732Sume	uint32_t em_ctl;
196219732Sume	uint32_t cap2;
197219732Sume	uint32_t bohc;
198219732Sume	uint32_t lintr;
199219732Sume	struct ahci_port port[MAX_PORTS];
200219732Sume};
201219732Sume#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
202219732Sume
203219732Sumestatic inline void lba_to_msf(uint8_t *buf, int lba)
204219732Sume{
205219732Sume	lba += 150;
206219732Sume	buf[0] = (lba / 75) / 60;
207219732Sume	buf[1] = (lba / 75) % 60;
208219732Sume	buf[2] = lba % 75;
209219732Sume}
210219732Sume
211219732Sume/*
212219732Sume * generate HBA intr depending on whether or not ports within
213282746Sgjb * the controller have an interrupt pending.
214282746Sgjb */
215282746Sgjbstatic void
216282746Sgjbahci_generate_intr(struct pci_ahci_softc *sc)
217282746Sgjb{
218282746Sgjb	struct pci_devinst *pi;
219282746Sgjb	int i;
220282746Sgjb
221282746Sgjb	pi = sc->asc_pi;
222282746Sgjb
223282746Sgjb	for (i = 0; i < sc->ports; i++) {
224282746Sgjb		struct ahci_port *pr;
225282746Sgjb		pr = &sc->port[i];
226282746Sgjb		if (pr->is & pr->ie)
227282746Sgjb			sc->is |= (1 << i);
228282746Sgjb	}
229282746Sgjb
230282746Sgjb	DPRINTF("%s %x\n", __func__, sc->is);
231282746Sgjb
232282746Sgjb	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
233282746Sgjb		if (pci_msi_enabled(pi)) {
234282746Sgjb			/*
235282746Sgjb			 * Generate an MSI interrupt on every edge
236282746Sgjb			 */
237282746Sgjb			pci_generate_msi(pi, 0);
238282746Sgjb		} else if (!sc->lintr) {
239282746Sgjb			/*
240282746Sgjb			 * Only generate a pin-based interrupt if one wasn't
241282746Sgjb			 * in progress
242282746Sgjb			 */
243282746Sgjb			sc->lintr = 1;
244282746Sgjb			pci_lintr_assert(pi);
245282746Sgjb		}
246282746Sgjb	} else if (sc->lintr) {
247282746Sgjb		/*
248219732Sume		 * No interrupts: deassert pin-based signal if it had
249219732Sume		 * been asserted
250219732Sume		 */
251219732Sume		pci_lintr_deassert(pi);
252282746Sgjb		sc->lintr = 0;
253219732Sume	}
254219732Sume}
255282746Sgjb
256282746Sgjbstatic void
257282746Sgjbahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
258282746Sgjb{
259282746Sgjb	int offset, len, irq;
260282746Sgjb
261282746Sgjb	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
262282746Sgjb		return;
263282746Sgjb
264282746Sgjb	switch (ft) {
265282746Sgjb	case FIS_TYPE_REGD2H:
266282746Sgjb		offset = 0x40;
267282746Sgjb		len = 20;
268282746Sgjb		irq = AHCI_P_IX_DHR;
269282746Sgjb		break;
270282746Sgjb	case FIS_TYPE_SETDEVBITS:
271282746Sgjb		offset = 0x58;
272282746Sgjb		len = 8;
273219732Sume		irq = AHCI_P_IX_SDB;
274219732Sume		break;
275219732Sume	case FIS_TYPE_PIOSETUP:
276282746Sgjb		offset = 0x20;
277219732Sume		len = 20;
278282746Sgjb		irq = 0;
279219732Sume		break;
280219732Sume	default:
281282746Sgjb		WPRINTF("unsupported fis type %d\n", ft);
282282746Sgjb		return;
283282746Sgjb	}
284282746Sgjb	memcpy(p->rfis + offset, fis, len);
285219732Sume	if (irq) {
286219732Sume		p->is |= irq;
287219732Sume		ahci_generate_intr(p->pr_sc);
288219732Sume	}
289219732Sume}
290282746Sgjb
291282746Sgjbstatic void
292282746Sgjbahci_write_fis_piosetup(struct ahci_port *p)
293282746Sgjb{
294282746Sgjb	uint8_t fis[20];
295219732Sume
296219732Sume	memset(fis, 0, sizeof(fis));
297219732Sume	fis[0] = FIS_TYPE_PIOSETUP;
298219732Sume	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
299282746Sgjb}
300219732Sume
301219732Sumestatic void
302219732Sumeahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
303219732Sume{
304219732Sume	uint8_t fis[8];
305219732Sume	uint8_t error;
306282746Sgjb
307219732Sume	error = (tfd >> 8) & 0xff;
308219732Sume	memset(fis, 0, sizeof(fis));
309282746Sgjb	fis[0] = error;
310219732Sume	fis[2] = tfd & 0x77;
311219732Sume	*(uint32_t *)(fis + 4) = (1 << slot);
312282746Sgjb	if (fis[2] & ATA_S_ERROR)
313219732Sume		p->is |= AHCI_P_IX_TFE;
314219732Sume	p->tfd = tfd;
315219732Sume	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
316219732Sume}
317219732Sume
318282746Sgjbstatic void
319219732Sumeahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
320219732Sume{
321219732Sume	uint8_t fis[20];
322282746Sgjb	uint8_t error;
323219732Sume
324219732Sume	error = (tfd >> 8) & 0xff;
325219732Sume	memset(fis, 0, sizeof(fis));
326219732Sume	fis[0] = FIS_TYPE_REGD2H;
327219732Sume	fis[1] = (1 << 6);
328282746Sgjb	fis[2] = tfd & 0xff;
329282746Sgjb	fis[3] = error;
330282746Sgjb	fis[4] = cfis[4];
331282746Sgjb	fis[5] = cfis[5];
332282746Sgjb	fis[6] = cfis[6];
333282746Sgjb	fis[7] = cfis[7];
334282746Sgjb	fis[8] = cfis[8];
335282746Sgjb	fis[9] = cfis[9];
336282746Sgjb	fis[10] = cfis[10];
337282746Sgjb	fis[11] = cfis[11];
338282746Sgjb	fis[12] = cfis[12];
339282746Sgjb	fis[13] = cfis[13];
340282746Sgjb	if (fis[2] & ATA_S_ERROR)
341282746Sgjb		p->is |= AHCI_P_IX_TFE;
342282746Sgjb	else
343282746Sgjb		p->ci &= ~(1 << slot);
344282746Sgjb	p->tfd = tfd;
345282746Sgjb	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
346282746Sgjb}
347282746Sgjb
348282746Sgjbstatic void
349282746Sgjbahci_write_reset_fis_d2h(struct ahci_port *p)
350282746Sgjb{
351282746Sgjb	uint8_t fis[20];
352282746Sgjb
353282746Sgjb	memset(fis, 0, sizeof(fis));
354282746Sgjb	fis[0] = FIS_TYPE_REGD2H;
355282746Sgjb	fis[3] = 1;
356282746Sgjb	fis[4] = 1;
357282746Sgjb	if (p->atapi) {
358282746Sgjb		fis[5] = 0x14;
359282746Sgjb		fis[6] = 0xeb;
360282746Sgjb	}
361282746Sgjb	fis[12] = 1;
362282746Sgjb	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
363282746Sgjb}
364282746Sgjb
365282746Sgjbstatic void
366282746Sgjbahci_check_stopped(struct ahci_port *p)
367282746Sgjb{
368282746Sgjb	/*
369282746Sgjb	 * If we are no longer processing the command list and nothing
370282746Sgjb	 * is in-flight, clear the running bit, the current command
371282746Sgjb	 * slot, the command issue and active bits.
372282746Sgjb	 */
373282746Sgjb	if (!(p->cmd & AHCI_P_CMD_ST)) {
374282746Sgjb		if (p->pending == 0) {
375282746Sgjb			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
376282746Sgjb			p->ci = 0;
377282746Sgjb			p->sact = 0;
378282746Sgjb		}
379282746Sgjb	}
380282746Sgjb}
381282746Sgjb
382282746Sgjbstatic void
383282746Sgjbahci_port_stop(struct ahci_port *p)
384282746Sgjb{
385282746Sgjb	struct ahci_ioreq *aior;
386282746Sgjb	uint8_t *cfis;
387282746Sgjb	int slot;
388282746Sgjb	int ncq;
389282746Sgjb	int error;
390282746Sgjb
391282746Sgjb	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
392282746Sgjb
393282746Sgjb	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
394282746Sgjb		/*
395282746Sgjb		 * Try to cancel the outstanding blockif request.
396282746Sgjb		 */
397282746Sgjb		error = blockif_cancel(p->bctx, &aior->io_req);
398282746Sgjb		if (error != 0)
399282746Sgjb			continue;
400282746Sgjb
401282746Sgjb		slot = aior->slot;
402282746Sgjb		cfis = aior->cfis;
403282746Sgjb		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
404282746Sgjb		    cfis[2] == ATA_READ_FPDMA_QUEUED)
405282746Sgjb			ncq = 1;
406282746Sgjb
407282746Sgjb		if (ncq)
408282746Sgjb			p->sact &= ~(1 << slot);
409282746Sgjb		else
410282746Sgjb			p->ci &= ~(1 << slot);
411282746Sgjb
412282746Sgjb		/*
413282746Sgjb		 * This command is now done.
414282746Sgjb		 */
415282746Sgjb		p->pending &= ~(1 << slot);
416282746Sgjb
417282746Sgjb		/*
418282746Sgjb		 * Delete the blockif request from the busy list
419282746Sgjb		 */
420219732Sume		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
421219732Sume
422282746Sgjb		/*
423219732Sume		 * Move the blockif request back to the free list
424282746Sgjb		 */
425282746Sgjb		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
426282746Sgjb	}
427282746Sgjb
428282746Sgjb	ahci_check_stopped(p);
429282746Sgjb}
430282746Sgjb
431282746Sgjbstatic void
432282746Sgjbahci_port_reset(struct ahci_port *pr)
433282746Sgjb{
434282746Sgjb	pr->serr = 0;
435282746Sgjb	pr->sact = 0;
436282746Sgjb	pr->xfermode = ATA_UDMA6;
437282746Sgjb	pr->mult_sectors = 128;
438282746Sgjb
439282746Sgjb	if (!pr->bctx) {
440282746Sgjb		pr->ssts = ATA_SS_DET_NO_DEVICE;
441282746Sgjb		pr->sig = 0xFFFFFFFF;
442282746Sgjb		pr->tfd = 0x7F;
443219732Sume		return;
444219732Sume	}
445219732Sume	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
446282746Sgjb	if (pr->sctl & ATA_SC_SPD_MASK)
447219732Sume		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
448219732Sume	else
449219732Sume		pr->ssts |= ATA_SS_SPD_GEN3;
450219732Sume	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
451219732Sume	if (!pr->atapi) {
452219732Sume		pr->sig = PxSIG_ATA;
453219732Sume		pr->tfd |= ATA_S_READY;
454219732Sume	} else
455219732Sume		pr->sig = PxSIG_ATAPI;
456219732Sume	ahci_write_reset_fis_d2h(pr);
457282746Sgjb}
458282746Sgjb
459282746Sgjbstatic void
460219732Sumeahci_reset(struct pci_ahci_softc *sc)
461219732Sume{
462219732Sume	int i;
463219732Sume
464219732Sume	sc->ghc = AHCI_GHC_AE;
465219732Sume	sc->is = 0;
466282746Sgjb
467282746Sgjb	if (sc->lintr) {
468282746Sgjb		pci_lintr_deassert(sc->asc_pi);
469219732Sume		sc->lintr = 0;
470282746Sgjb	}
471282746Sgjb
472282746Sgjb	for (i = 0; i < sc->ports; i++) {
473282746Sgjb		sc->port[i].ie = 0;
474282746Sgjb		sc->port[i].is = 0;
475282746Sgjb		sc->port[i].sctl = 0;
476282746Sgjb		ahci_port_reset(&sc->port[i]);
477282746Sgjb	}
478282746Sgjb}
479282746Sgjb
480282746Sgjbstatic void
481219732Sumeata_string(uint8_t *dest, const char *src, int len)
482219732Sume{
483219732Sume	int i;
484219732Sume
485282746Sgjb	for (i = 0; i < len; i++) {
486282746Sgjb		if (*src)
487219732Sume			dest[i ^ 1] = *src++;
488219732Sume		else
489219732Sume			dest[i ^ 1] = ' ';
490219732Sume	}
491219732Sume}
492282746Sgjb
493282746Sgjbstatic void
494282746Sgjbatapi_string(uint8_t *dest, const char *src, int len)
495282746Sgjb{
496282746Sgjb	int i;
497282746Sgjb
498282746Sgjb	for (i = 0; i < len; i++) {
499282746Sgjb		if (*src)
500282746Sgjb			dest[i] = *src++;
501219732Sume		else
502219732Sume			dest[i] = ' ';
503219732Sume	}
504219732Sume}
505219732Sume
506282746Sgjbstatic void
507219732Sumeahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
508219732Sume    int seek)
509219732Sume{
510219732Sume	struct ahci_ioreq *aior;
511219732Sume	struct blockif_req *breq;
512219732Sume	struct pci_ahci_softc *sc;
513219732Sume	struct ahci_prdt_entry *prdt;
514219732Sume	struct ahci_cmd_hdr *hdr;
515219732Sume	uint64_t lba;
516282746Sgjb	uint32_t len;
517282746Sgjb	int i, err, iovcnt, ncq, readop;
518282746Sgjb
519282746Sgjb	sc = p->pr_sc;
520282746Sgjb	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
521282746Sgjb	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
522219732Sume	ncq = 0;
523219732Sume	readop = 1;
524219732Sume
525219732Sume	prdt += seek;
526219732Sume	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
527219732Sume	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
528219732Sume	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
529282746Sgjb	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
530219732Sume		readop = 0;
531219732Sume
532219732Sume	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
533219732Sume	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
534219732Sume		lba = ((uint64_t)cfis[10] << 40) |
535219732Sume			((uint64_t)cfis[9] << 32) |
536219732Sume			((uint64_t)cfis[8] << 24) |
537219732Sume			((uint64_t)cfis[6] << 16) |
538219732Sume			((uint64_t)cfis[5] << 8) |
539219732Sume			cfis[4];
540219732Sume		len = cfis[11] << 8 | cfis[3];
541219732Sume		if (!len)
542219732Sume			len = 65536;
543282746Sgjb		ncq = 1;
544219732Sume	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
545219732Sume	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
546219732Sume	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
547219732Sume		lba = ((uint64_t)cfis[10] << 40) |
548219732Sume			((uint64_t)cfis[9] << 32) |
549219732Sume			((uint64_t)cfis[8] << 24) |
550219732Sume			((uint64_t)cfis[6] << 16) |
551219732Sume			((uint64_t)cfis[5] << 8) |
552219732Sume			cfis[4];
553219732Sume		len = cfis[13] << 8 | cfis[12];
554219732Sume		if (!len)
555219732Sume			len = 65536;
556219732Sume	} else {
557219732Sume		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
558219732Sume			(cfis[5] << 8) | cfis[4];
559282746Sgjb		len = cfis[12];
560282746Sgjb		if (!len)
561282746Sgjb			len = 256;
562282746Sgjb	}
563282746Sgjb	lba *= blockif_sectsz(p->bctx);
564282746Sgjb	len *= blockif_sectsz(p->bctx);
565282746Sgjb
566282746Sgjb	/*
567219732Sume	 * Pull request off free list
568282746Sgjb	 */
569282746Sgjb	aior = STAILQ_FIRST(&p->iofhd);
570282746Sgjb	assert(aior != NULL);
571282746Sgjb	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
572282746Sgjb	aior->cfis = cfis;
573219732Sume	aior->slot = slot;
574282746Sgjb	aior->len = len;
575282746Sgjb	aior->done = done;
576282746Sgjb	breq = &aior->io_req;
577219732Sume	breq->br_offset = lba + done;
578219732Sume	iovcnt = hdr->prdtl - seek;
579219732Sume	if (iovcnt > BLOCKIF_IOV_MAX) {
580282746Sgjb		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
581282746Sgjb		iovcnt = BLOCKIF_IOV_MAX;
582282746Sgjb	} else
583282746Sgjb		aior->prdtl = 0;
584282746Sgjb	breq->br_iovcnt = iovcnt;
585282746Sgjb
586282746Sgjb	/*
587282746Sgjb	 * Mark this command in-flight.
588282746Sgjb	 */
589219732Sume	p->pending |= 1 << slot;
590219732Sume
591219732Sume	/*
592282746Sgjb	 * Stuff request onto busy list
593282746Sgjb	 */
594282746Sgjb	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
595282746Sgjb
596282746Sgjb	/*
597282746Sgjb	 * Build up the iovec based on the prdt
598282746Sgjb	 */
599282746Sgjb	for (i = 0; i < iovcnt; i++) {
600282746Sgjb		uint32_t dbcsz;
601282746Sgjb
602282746Sgjb		dbcsz = (prdt->dbc & DBCMASK) + 1;
603282746Sgjb		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
604282746Sgjb		    prdt->dba, dbcsz);
605282746Sgjb		breq->br_iov[i].iov_len = dbcsz;
606282746Sgjb		aior->done += dbcsz;
607282746Sgjb		prdt++;
608282746Sgjb	}
609282746Sgjb	if (readop)
610282746Sgjb		err = blockif_read(p->bctx, breq);
611282746Sgjb	else
612282746Sgjb		err = blockif_write(p->bctx, breq);
613282746Sgjb	assert(err == 0);
614282746Sgjb
615282746Sgjb	if (ncq)
616282746Sgjb		p->ci &= ~(1 << slot);
617282746Sgjb}
618282746Sgjb
619282746Sgjbstatic void
620282746Sgjbahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
621282746Sgjb{
622219732Sume	struct ahci_ioreq *aior;
623225524Shrs	struct blockif_req *breq;
624282746Sgjb	int err;
625282746Sgjb
626219732Sume	/*
627219732Sume	 * Pull request off free list
628282746Sgjb	 */
629219732Sume	aior = STAILQ_FIRST(&p->iofhd);
630219732Sume	assert(aior != NULL);
631282746Sgjb	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
632282746Sgjb	aior->cfis = cfis;
633219732Sume	aior->slot = slot;
634282746Sgjb	aior->len = 0;
635282746Sgjb	aior->done = 0;
636282746Sgjb	aior->prdtl = 0;
637219732Sume	breq = &aior->io_req;
638282746Sgjb
639282746Sgjb	/*
640282746Sgjb	 * Mark this command in-flight.
641219732Sume	 */
642282746Sgjb	p->pending |= 1 << slot;
643282746Sgjb
644219732Sume	/*
645219732Sume	 * Stuff request onto busy list
646219732Sume	 */
647219732Sume	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
648219732Sume
649282746Sgjb	err = blockif_flush(p->bctx, breq);
650219732Sume	assert(err == 0);
651282746Sgjb}
652282746Sgjb
653282746Sgjbstatic inline void
654282746Sgjbread_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
655282746Sgjb		void *buf, int size)
656282746Sgjb{
657219732Sume	struct ahci_cmd_hdr *hdr;
658219732Sume	struct ahci_prdt_entry *prdt;
659219732Sume	void *to;
660219732Sume	int i, len;
661219732Sume
662219732Sume	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
663282746Sgjb	len = size;
664219732Sume	to = buf;
665219732Sume	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
666219732Sume	for (i = 0; i < hdr->prdtl && len; i++) {
667219732Sume		uint8_t *ptr;
668219732Sume		uint32_t dbcsz;
669282746Sgjb		int sublen;
670219732Sume
671219732Sume		dbcsz = (prdt->dbc & DBCMASK) + 1;
672219732Sume		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
673219732Sume		sublen = len < dbcsz ? len : dbcsz;
674282746Sgjb		memcpy(to, ptr, sublen);
675282746Sgjb		len -= sublen;
676282746Sgjb		to += sublen;
677282746Sgjb		prdt++;
678282746Sgjb	}
679282746Sgjb}
680282746Sgjb
681282746Sgjbstatic void
682282746Sgjbahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
683282746Sgjb{
684282746Sgjb	struct ahci_ioreq *aior;
685282746Sgjb	struct blockif_req *breq;
686282746Sgjb	uint8_t *entry;
687282746Sgjb	uint64_t elba;
688282746Sgjb	uint32_t len, elen;
689282746Sgjb	int err;
690282746Sgjb	uint8_t buf[512];
691282746Sgjb
692282746Sgjb	len = (uint16_t)cfis[13] << 8 | cfis[12];
693282746Sgjb	len *= 512;
694282746Sgjb	read_prdt(p, slot, cfis, buf, sizeof(buf));
695282746Sgjb
696282746Sgjbnext:
697282746Sgjb	entry = &buf[done];
698282746Sgjb	elba = ((uint64_t)entry[5] << 40) |
699282746Sgjb		((uint64_t)entry[4] << 32) |
700282746Sgjb		((uint64_t)entry[3] << 24) |
701282746Sgjb		((uint64_t)entry[2] << 16) |
702282746Sgjb		((uint64_t)entry[1] << 8) |
703282746Sgjb		entry[0];
704282746Sgjb	elen = (uint16_t)entry[7] << 8 | entry[6];
705282746Sgjb	done += 8;
706282746Sgjb	if (elen == 0) {
707282746Sgjb		if (done >= len) {
708282746Sgjb			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
709282746Sgjb			p->pending &= ~(1 << slot);
710282746Sgjb			ahci_check_stopped(p);
711282746Sgjb			return;
712282746Sgjb		}
713282746Sgjb		goto next;
714282746Sgjb	}
715282746Sgjb
716282746Sgjb	/*
717282746Sgjb	 * Pull request off free list
718282746Sgjb	 */
719282746Sgjb	aior = STAILQ_FIRST(&p->iofhd);
720282746Sgjb	assert(aior != NULL);
721282746Sgjb	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
722282746Sgjb	aior->cfis = cfis;
723282746Sgjb	aior->slot = slot;
724282746Sgjb	aior->len = len;
725282746Sgjb	aior->done = done;
726282746Sgjb	aior->prdtl = 0;
727282746Sgjb
728282746Sgjb	breq = &aior->io_req;
729282746Sgjb	breq->br_offset = elba * blockif_sectsz(p->bctx);
730282746Sgjb	breq->br_iovcnt = 1;
731282746Sgjb	breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
732282746Sgjb
733282746Sgjb	/*
734282746Sgjb	 * Mark this command in-flight.
735282746Sgjb	 */
736282746Sgjb	p->pending |= 1 << slot;
737282746Sgjb
738282746Sgjb	/*
739282746Sgjb	 * Stuff request onto busy list
740282746Sgjb	 */
741282746Sgjb	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
742282746Sgjb
743282746Sgjb	err = blockif_delete(p->bctx, breq);
744282746Sgjb	assert(err == 0);
745282746Sgjb}
746282746Sgjb
747282746Sgjbstatic inline void
748282746Sgjbwrite_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
749282746Sgjb		void *buf, int size)
750282746Sgjb{
751282746Sgjb	struct ahci_cmd_hdr *hdr;
752219732Sume	struct ahci_prdt_entry *prdt;
753219732Sume	void *from;
754219732Sume	int i, len;
755219732Sume
756219732Sume	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
757219732Sume	len = size;
758282746Sgjb	from = buf;
759282746Sgjb	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
760282746Sgjb	for (i = 0; i < hdr->prdtl && len; i++) {
761282746Sgjb		uint8_t *ptr;
762282746Sgjb		uint32_t dbcsz;
763219732Sume		int sublen;
764219732Sume
765219732Sume		dbcsz = (prdt->dbc & DBCMASK) + 1;
766282746Sgjb		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
767219732Sume		sublen = len < dbcsz ? len : dbcsz;
768219732Sume		memcpy(ptr, from, sublen);
769219732Sume		len -= sublen;
770219732Sume		from += sublen;
771219732Sume		prdt++;
772	}
773	hdr->prdbc = size - len;
774}
775
776static void
777handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
778{
779	struct ahci_cmd_hdr *hdr;
780
781	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
782	if (p->atapi || hdr->prdtl == 0) {
783		ahci_write_fis_d2h(p, slot, cfis,
784		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
785	} else {
786		uint16_t buf[256];
787		uint64_t sectors;
788		int sectsz, psectsz, psectoff, candelete, ro;
789		uint16_t cyl;
790		uint8_t sech, heads;
791
792		ro = blockif_is_ro(p->bctx);
793		candelete = blockif_candelete(p->bctx);
794		sectsz = blockif_sectsz(p->bctx);
795		sectors = blockif_size(p->bctx) / sectsz;
796		blockif_chs(p->bctx, &cyl, &heads, &sech);
797		blockif_psectsz(p->bctx, &psectsz, &psectoff);
798		memset(buf, 0, sizeof(buf));
799		buf[0] = 0x0040;
800		buf[1] = cyl;
801		buf[3] = heads;
802		buf[6] = sech;
803		/* TODO emulate different serial? */
804		ata_string((uint8_t *)(buf+10), "123456", 20);
805		ata_string((uint8_t *)(buf+23), "001", 8);
806		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
807		buf[47] = (0x8000 | 128);
808		buf[48] = 0x1;
809		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
810		buf[50] = (1 << 14);
811		buf[53] = (1 << 1 | 1 << 2);
812		if (p->mult_sectors)
813			buf[59] = (0x100 | p->mult_sectors);
814		if (sectors <= 0x0fffffff) {
815			buf[60] = sectors;
816			buf[61] = (sectors >> 16);
817		} else {
818			buf[60] = 0xffff;
819			buf[61] = 0x0fff;
820		}
821		buf[63] = 0x7;
822		if (p->xfermode & ATA_WDMA0)
823			buf[63] |= (1 << ((p->xfermode & 7) + 8));
824		buf[64] = 0x3;
825		buf[65] = 120;
826		buf[66] = 120;
827		buf[67] = 120;
828		buf[68] = 120;
829		buf[69] = 0;
830		buf[75] = 31;
831		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
832			   ATA_SUPPORT_NCQ);
833		buf[80] = 0x1f0;
834		buf[81] = 0x28;
835		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
836			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
837		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
838			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
839		buf[84] = (1 << 14);
840		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
841			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
842		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
843			   ATA_SUPPORT_FLUSHCACHE48);
844		buf[87] = (1 << 14);
845		buf[88] = 0x7f;
846		if (p->xfermode & ATA_UDMA0)
847			buf[88] |= (1 << ((p->xfermode & 7) + 8));
848		buf[93] = (1 | 1 <<14);
849		buf[100] = sectors;
850		buf[101] = (sectors >> 16);
851		buf[102] = (sectors >> 32);
852		buf[103] = (sectors >> 48);
853		if (candelete && !ro) {
854			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
855			buf[105] = 1;
856			buf[169] = ATA_SUPPORT_DSM_TRIM;
857		}
858		buf[106] = 0x4000;
859		buf[209] = 0x4000;
860		if (psectsz > sectsz) {
861			buf[106] |= 0x2000;
862			buf[106] |= ffsl(psectsz / sectsz) - 1;
863			buf[209] |= (psectoff / sectsz);
864		}
865		if (sectsz > 512) {
866			buf[106] |= 0x1000;
867			buf[117] = sectsz / 2;
868			buf[118] = ((sectsz / 2) >> 16);
869		}
870		buf[222] = 0x1020;
871		ahci_write_fis_piosetup(p);
872		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
873		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
874	}
875}
876
877static void
878handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
879{
880	if (!p->atapi) {
881		ahci_write_fis_d2h(p, slot, cfis,
882		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
883	} else {
884		uint16_t buf[256];
885
886		memset(buf, 0, sizeof(buf));
887		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
888		/* TODO emulate different serial? */
889		ata_string((uint8_t *)(buf+10), "123456", 20);
890		ata_string((uint8_t *)(buf+23), "001", 8);
891		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
892		buf[49] = (1 << 9 | 1 << 8);
893		buf[50] = (1 << 14 | 1);
894		buf[53] = (1 << 2 | 1 << 1);
895		buf[62] = 0x3f;
896		buf[63] = 7;
897		buf[64] = 3;
898		buf[65] = 100;
899		buf[66] = 100;
900		buf[67] = 100;
901		buf[68] = 100;
902		buf[76] = (1 << 2 | 1 << 1);
903		buf[78] = (1 << 5);
904		buf[80] = (0x1f << 4);
905		buf[82] = (1 << 4);
906		buf[83] = (1 << 14);
907		buf[84] = (1 << 14);
908		buf[85] = (1 << 4);
909		buf[87] = (1 << 14);
910		buf[88] = (1 << 14 | 0x7f);
911		ahci_write_fis_piosetup(p);
912		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
913		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
914	}
915}
916
917static void
918atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
919{
920	uint8_t buf[36];
921	uint8_t *acmd;
922	int len;
923
924	acmd = cfis + 0x40;
925
926	buf[0] = 0x05;
927	buf[1] = 0x80;
928	buf[2] = 0x00;
929	buf[3] = 0x21;
930	buf[4] = 31;
931	buf[5] = 0;
932	buf[6] = 0;
933	buf[7] = 0;
934	atapi_string(buf + 8, "BHYVE", 8);
935	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
936	atapi_string(buf + 32, "001", 4);
937
938	len = sizeof(buf);
939	if (len > acmd[4])
940		len = acmd[4];
941	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
942	write_prdt(p, slot, cfis, buf, len);
943	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
944}
945
946static void
947atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
948{
949	uint8_t buf[8];
950	uint64_t sectors;
951
952	sectors = blockif_size(p->bctx) / 2048;
953	be32enc(buf, sectors - 1);
954	be32enc(buf + 4, 2048);
955	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
956	write_prdt(p, slot, cfis, buf, sizeof(buf));
957	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
958}
959
960static void
961atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
962{
963	uint8_t *acmd;
964	uint8_t format;
965	int len;
966
967	acmd = cfis + 0x40;
968
969	len = be16dec(acmd + 7);
970	format = acmd[9] >> 6;
971	switch (format) {
972	case 0:
973	{
974		int msf, size;
975		uint64_t sectors;
976		uint8_t start_track, buf[20], *bp;
977
978		msf = (acmd[1] >> 1) & 1;
979		start_track = acmd[6];
980		if (start_track > 1 && start_track != 0xaa) {
981			uint32_t tfd;
982			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
983			p->asc = 0x24;
984			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
985			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
986			ahci_write_fis_d2h(p, slot, cfis, tfd);
987			return;
988		}
989		bp = buf + 2;
990		*bp++ = 1;
991		*bp++ = 1;
992		if (start_track <= 1) {
993			*bp++ = 0;
994			*bp++ = 0x14;
995			*bp++ = 1;
996			*bp++ = 0;
997			if (msf) {
998				*bp++ = 0;
999				lba_to_msf(bp, 0);
1000				bp += 3;
1001			} else {
1002				*bp++ = 0;
1003				*bp++ = 0;
1004				*bp++ = 0;
1005				*bp++ = 0;
1006			}
1007		}
1008		*bp++ = 0;
1009		*bp++ = 0x14;
1010		*bp++ = 0xaa;
1011		*bp++ = 0;
1012		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1013		sectors >>= 2;
1014		if (msf) {
1015			*bp++ = 0;
1016			lba_to_msf(bp, sectors);
1017			bp += 3;
1018		} else {
1019			be32enc(bp, sectors);
1020			bp += 4;
1021		}
1022		size = bp - buf;
1023		be16enc(buf, size - 2);
1024		if (len > size)
1025			len = size;
1026		write_prdt(p, slot, cfis, buf, len);
1027		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1028		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1029		break;
1030	}
1031	case 1:
1032	{
1033		uint8_t buf[12];
1034
1035		memset(buf, 0, sizeof(buf));
1036		buf[1] = 0xa;
1037		buf[2] = 0x1;
1038		buf[3] = 0x1;
1039		if (len > sizeof(buf))
1040			len = sizeof(buf);
1041		write_prdt(p, slot, cfis, buf, len);
1042		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1043		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1044		break;
1045	}
1046	case 2:
1047	{
1048		int msf, size;
1049		uint64_t sectors;
1050		uint8_t start_track, *bp, buf[50];
1051
1052		msf = (acmd[1] >> 1) & 1;
1053		start_track = acmd[6];
1054		bp = buf + 2;
1055		*bp++ = 1;
1056		*bp++ = 1;
1057
1058		*bp++ = 1;
1059		*bp++ = 0x14;
1060		*bp++ = 0;
1061		*bp++ = 0xa0;
1062		*bp++ = 0;
1063		*bp++ = 0;
1064		*bp++ = 0;
1065		*bp++ = 0;
1066		*bp++ = 1;
1067		*bp++ = 0;
1068		*bp++ = 0;
1069
1070		*bp++ = 1;
1071		*bp++ = 0x14;
1072		*bp++ = 0;
1073		*bp++ = 0xa1;
1074		*bp++ = 0;
1075		*bp++ = 0;
1076		*bp++ = 0;
1077		*bp++ = 0;
1078		*bp++ = 1;
1079		*bp++ = 0;
1080		*bp++ = 0;
1081
1082		*bp++ = 1;
1083		*bp++ = 0x14;
1084		*bp++ = 0;
1085		*bp++ = 0xa2;
1086		*bp++ = 0;
1087		*bp++ = 0;
1088		*bp++ = 0;
1089		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1090		sectors >>= 2;
1091		if (msf) {
1092			*bp++ = 0;
1093			lba_to_msf(bp, sectors);
1094			bp += 3;
1095		} else {
1096			be32enc(bp, sectors);
1097			bp += 4;
1098		}
1099
1100		*bp++ = 1;
1101		*bp++ = 0x14;
1102		*bp++ = 0;
1103		*bp++ = 1;
1104		*bp++ = 0;
1105		*bp++ = 0;
1106		*bp++ = 0;
1107		if (msf) {
1108			*bp++ = 0;
1109			lba_to_msf(bp, 0);
1110			bp += 3;
1111		} else {
1112			*bp++ = 0;
1113			*bp++ = 0;
1114			*bp++ = 0;
1115			*bp++ = 0;
1116		}
1117
1118		size = bp - buf;
1119		be16enc(buf, size - 2);
1120		if (len > size)
1121			len = size;
1122		write_prdt(p, slot, cfis, buf, len);
1123		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1124		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1125		break;
1126	}
1127	default:
1128	{
1129		uint32_t tfd;
1130
1131		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1132		p->asc = 0x24;
1133		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1134		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1135		ahci_write_fis_d2h(p, slot, cfis, tfd);
1136		break;
1137	}
1138	}
1139}
1140
1141static void
1142atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
1143		uint32_t done, int seek)
1144{
1145	struct ahci_ioreq *aior;
1146	struct ahci_cmd_hdr *hdr;
1147	struct ahci_prdt_entry *prdt;
1148	struct blockif_req *breq;
1149	struct pci_ahci_softc *sc;
1150	uint8_t *acmd;
1151	uint64_t lba;
1152	uint32_t len;
1153	int i, err, iovcnt;
1154
1155	sc = p->pr_sc;
1156	acmd = cfis + 0x40;
1157	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1158	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1159
1160	prdt += seek;
1161	lba = be32dec(acmd + 2);
1162	if (acmd[0] == READ_10)
1163		len = be16dec(acmd + 7);
1164	else
1165		len = be32dec(acmd + 6);
1166	if (len == 0) {
1167		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1168		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1169	}
1170	lba *= 2048;
1171	len *= 2048;
1172
1173	/*
1174	 * Pull request off free list
1175	 */
1176	aior = STAILQ_FIRST(&p->iofhd);
1177	assert(aior != NULL);
1178	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1179	aior->cfis = cfis;
1180	aior->slot = slot;
1181	aior->len = len;
1182	aior->done = done;
1183	breq = &aior->io_req;
1184	breq->br_offset = lba + done;
1185	iovcnt = hdr->prdtl - seek;
1186	if (iovcnt > BLOCKIF_IOV_MAX) {
1187		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
1188		iovcnt = BLOCKIF_IOV_MAX;
1189	} else
1190		aior->prdtl = 0;
1191	breq->br_iovcnt = iovcnt;
1192
1193	/*
1194	 * Mark this command in-flight.
1195	 */
1196	p->pending |= 1 << slot;
1197
1198	/*
1199	 * Stuff request onto busy list
1200	 */
1201	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1202
1203	/*
1204	 * Build up the iovec based on the prdt
1205	 */
1206	for (i = 0; i < iovcnt; i++) {
1207		uint32_t dbcsz;
1208
1209		dbcsz = (prdt->dbc & DBCMASK) + 1;
1210		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
1211		    prdt->dba, dbcsz);
1212		breq->br_iov[i].iov_len = dbcsz;
1213		aior->done += dbcsz;
1214		prdt++;
1215	}
1216	err = blockif_read(p->bctx, breq);
1217	assert(err == 0);
1218}
1219
1220static void
1221atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1222{
1223	uint8_t buf[64];
1224	uint8_t *acmd;
1225	int len;
1226
1227	acmd = cfis + 0x40;
1228	len = acmd[4];
1229	if (len > sizeof(buf))
1230		len = sizeof(buf);
1231	memset(buf, 0, len);
1232	buf[0] = 0x70 | (1 << 7);
1233	buf[2] = p->sense_key;
1234	buf[7] = 10;
1235	buf[12] = p->asc;
1236	write_prdt(p, slot, cfis, buf, len);
1237	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1238	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1239}
1240
1241static void
1242atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1243{
1244	uint8_t *acmd = cfis + 0x40;
1245	uint32_t tfd;
1246
1247	switch (acmd[4] & 3) {
1248	case 0:
1249	case 1:
1250	case 3:
1251		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1252		tfd = ATA_S_READY | ATA_S_DSC;
1253		break;
1254	case 2:
1255		/* TODO eject media */
1256		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1257		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1258		p->asc = 0x53;
1259		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1260		break;
1261	}
1262	ahci_write_fis_d2h(p, slot, cfis, tfd);
1263}
1264
1265static void
1266atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1267{
1268	uint8_t *acmd;
1269	uint32_t tfd;
1270	uint8_t pc, code;
1271	int len;
1272
1273	acmd = cfis + 0x40;
1274	len = be16dec(acmd + 7);
1275	pc = acmd[2] >> 6;
1276	code = acmd[2] & 0x3f;
1277
1278	switch (pc) {
1279	case 0:
1280		switch (code) {
1281		case MODEPAGE_RW_ERROR_RECOVERY:
1282		{
1283			uint8_t buf[16];
1284
1285			if (len > sizeof(buf))
1286				len = sizeof(buf);
1287
1288			memset(buf, 0, sizeof(buf));
1289			be16enc(buf, 16 - 2);
1290			buf[2] = 0x70;
1291			buf[8] = 0x01;
1292			buf[9] = 16 - 10;
1293			buf[11] = 0x05;
1294			write_prdt(p, slot, cfis, buf, len);
1295			tfd = ATA_S_READY | ATA_S_DSC;
1296			break;
1297		}
1298		case MODEPAGE_CD_CAPABILITIES:
1299		{
1300			uint8_t buf[30];
1301
1302			if (len > sizeof(buf))
1303				len = sizeof(buf);
1304
1305			memset(buf, 0, sizeof(buf));
1306			be16enc(buf, 30 - 2);
1307			buf[2] = 0x70;
1308			buf[8] = 0x2A;
1309			buf[9] = 30 - 10;
1310			buf[10] = 0x08;
1311			buf[12] = 0x71;
1312			be16enc(&buf[18], 2);
1313			be16enc(&buf[20], 512);
1314			write_prdt(p, slot, cfis, buf, len);
1315			tfd = ATA_S_READY | ATA_S_DSC;
1316			break;
1317		}
1318		default:
1319			goto error;
1320			break;
1321		}
1322		break;
1323	case 3:
1324		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1325		p->asc = 0x39;
1326		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1327		break;
1328error:
1329	case 1:
1330	case 2:
1331		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1332		p->asc = 0x24;
1333		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1334		break;
1335	}
1336	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1337	ahci_write_fis_d2h(p, slot, cfis, tfd);
1338}
1339
1340static void
1341atapi_get_event_status_notification(struct ahci_port *p, int slot,
1342    uint8_t *cfis)
1343{
1344	uint8_t *acmd;
1345	uint32_t tfd;
1346
1347	acmd = cfis + 0x40;
1348
1349	/* we don't support asynchronous operation */
1350	if (!(acmd[1] & 1)) {
1351		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1352		p->asc = 0x24;
1353		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1354	} else {
1355		uint8_t buf[8];
1356		int len;
1357
1358		len = be16dec(acmd + 7);
1359		if (len > sizeof(buf))
1360			len = sizeof(buf);
1361
1362		memset(buf, 0, sizeof(buf));
1363		be16enc(buf, 8 - 2);
1364		buf[2] = 0x04;
1365		buf[3] = 0x10;
1366		buf[5] = 0x02;
1367		write_prdt(p, slot, cfis, buf, len);
1368		tfd = ATA_S_READY | ATA_S_DSC;
1369	}
1370	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1371	ahci_write_fis_d2h(p, slot, cfis, tfd);
1372}
1373
1374static void
1375handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1376{
1377	uint8_t *acmd;
1378
1379	acmd = cfis + 0x40;
1380
1381#ifdef AHCI_DEBUG
1382	{
1383		int i;
1384		DPRINTF("ACMD:");
1385		for (i = 0; i < 16; i++)
1386			DPRINTF("%02x ", acmd[i]);
1387		DPRINTF("\n");
1388	}
1389#endif
1390
1391	switch (acmd[0]) {
1392	case TEST_UNIT_READY:
1393		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1394		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1395		break;
1396	case INQUIRY:
1397		atapi_inquiry(p, slot, cfis);
1398		break;
1399	case READ_CAPACITY:
1400		atapi_read_capacity(p, slot, cfis);
1401		break;
1402	case PREVENT_ALLOW:
1403		/* TODO */
1404		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1405		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1406		break;
1407	case READ_TOC:
1408		atapi_read_toc(p, slot, cfis);
1409		break;
1410	case READ_10:
1411	case READ_12:
1412		atapi_read(p, slot, cfis, 0, 0);
1413		break;
1414	case REQUEST_SENSE:
1415		atapi_request_sense(p, slot, cfis);
1416		break;
1417	case START_STOP_UNIT:
1418		atapi_start_stop_unit(p, slot, cfis);
1419		break;
1420	case MODE_SENSE_10:
1421		atapi_mode_sense(p, slot, cfis);
1422		break;
1423	case GET_EVENT_STATUS_NOTIFICATION:
1424		atapi_get_event_status_notification(p, slot, cfis);
1425		break;
1426	default:
1427		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1428		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1429		p->asc = 0x20;
1430		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1431				ATA_S_READY | ATA_S_ERROR);
1432		break;
1433	}
1434}
1435
1436static void
1437ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1438{
1439
1440	switch (cfis[2]) {
1441	case ATA_ATA_IDENTIFY:
1442		handle_identify(p, slot, cfis);
1443		break;
1444	case ATA_SETFEATURES:
1445	{
1446		switch (cfis[3]) {
1447		case ATA_SF_ENAB_SATA_SF:
1448			switch (cfis[12]) {
1449			case ATA_SATA_SF_AN:
1450				p->tfd = ATA_S_DSC | ATA_S_READY;
1451				break;
1452			default:
1453				p->tfd = ATA_S_ERROR | ATA_S_READY;
1454				p->tfd |= (ATA_ERROR_ABORT << 8);
1455				break;
1456			}
1457			break;
1458		case ATA_SF_ENAB_WCACHE:
1459		case ATA_SF_DIS_WCACHE:
1460		case ATA_SF_ENAB_RCACHE:
1461		case ATA_SF_DIS_RCACHE:
1462			p->tfd = ATA_S_DSC | ATA_S_READY;
1463			break;
1464		case ATA_SF_SETXFER:
1465		{
1466			switch (cfis[12] & 0xf8) {
1467			case ATA_PIO:
1468			case ATA_PIO0:
1469				break;
1470			case ATA_WDMA0:
1471			case ATA_UDMA0:
1472				p->xfermode = (cfis[12] & 0x7);
1473				break;
1474			}
1475			p->tfd = ATA_S_DSC | ATA_S_READY;
1476			break;
1477		}
1478		default:
1479			p->tfd = ATA_S_ERROR | ATA_S_READY;
1480			p->tfd |= (ATA_ERROR_ABORT << 8);
1481			break;
1482		}
1483		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1484		break;
1485	}
1486	case ATA_SET_MULTI:
1487		if (cfis[12] != 0 &&
1488			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1489			p->tfd = ATA_S_ERROR | ATA_S_READY;
1490			p->tfd |= (ATA_ERROR_ABORT << 8);
1491		} else {
1492			p->mult_sectors = cfis[12];
1493			p->tfd = ATA_S_DSC | ATA_S_READY;
1494		}
1495		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1496		break;
1497	case ATA_READ:
1498	case ATA_WRITE:
1499	case ATA_READ48:
1500	case ATA_WRITE48:
1501	case ATA_READ_MUL:
1502	case ATA_WRITE_MUL:
1503	case ATA_READ_MUL48:
1504	case ATA_WRITE_MUL48:
1505	case ATA_READ_DMA:
1506	case ATA_WRITE_DMA:
1507	case ATA_READ_DMA48:
1508	case ATA_WRITE_DMA48:
1509	case ATA_READ_FPDMA_QUEUED:
1510	case ATA_WRITE_FPDMA_QUEUED:
1511		ahci_handle_dma(p, slot, cfis, 0, 0);
1512		break;
1513	case ATA_FLUSHCACHE:
1514	case ATA_FLUSHCACHE48:
1515		ahci_handle_flush(p, slot, cfis);
1516		break;
1517	case ATA_DATA_SET_MANAGEMENT:
1518		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1519		    cfis[13] == 0 && cfis[12] == 1) {
1520			ahci_handle_dsm_trim(p, slot, cfis, 0);
1521			break;
1522		}
1523		ahci_write_fis_d2h(p, slot, cfis,
1524		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1525		break;
1526	case ATA_STANDBY_CMD:
1527		break;
1528	case ATA_NOP:
1529	case ATA_STANDBY_IMMEDIATE:
1530	case ATA_IDLE_IMMEDIATE:
1531	case ATA_SLEEP:
1532		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1533		break;
1534	case ATA_ATAPI_IDENTIFY:
1535		handle_atapi_identify(p, slot, cfis);
1536		break;
1537	case ATA_PACKET_CMD:
1538		if (!p->atapi) {
1539			ahci_write_fis_d2h(p, slot, cfis,
1540			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1541		} else
1542			handle_packet_cmd(p, slot, cfis);
1543		break;
1544	default:
1545		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1546		ahci_write_fis_d2h(p, slot, cfis,
1547		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1548		break;
1549	}
1550}
1551
1552static void
1553ahci_handle_slot(struct ahci_port *p, int slot)
1554{
1555	struct ahci_cmd_hdr *hdr;
1556	struct ahci_prdt_entry *prdt;
1557	struct pci_ahci_softc *sc;
1558	uint8_t *cfis;
1559	int cfl;
1560
1561	sc = p->pr_sc;
1562	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1563	cfl = (hdr->flags & 0x1f) * 4;
1564	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1565			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1566	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1567
1568#ifdef AHCI_DEBUG
1569	DPRINTF("\ncfis:");
1570	for (i = 0; i < cfl; i++) {
1571		if (i % 10 == 0)
1572			DPRINTF("\n");
1573		DPRINTF("%02x ", cfis[i]);
1574	}
1575	DPRINTF("\n");
1576
1577	for (i = 0; i < hdr->prdtl; i++) {
1578		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1579		prdt++;
1580	}
1581#endif
1582
1583	if (cfis[0] != FIS_TYPE_REGH2D) {
1584		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1585		return;
1586	}
1587
1588	if (cfis[1] & 0x80) {
1589		ahci_handle_cmd(p, slot, cfis);
1590	} else {
1591		if (cfis[15] & (1 << 2))
1592			p->reset = 1;
1593		else if (p->reset) {
1594			p->reset = 0;
1595			ahci_port_reset(p);
1596		}
1597		p->ci &= ~(1 << slot);
1598	}
1599}
1600
1601static void
1602ahci_handle_port(struct ahci_port *p)
1603{
1604	int i;
1605
1606	if (!(p->cmd & AHCI_P_CMD_ST))
1607		return;
1608
1609	/*
1610	 * Search for any new commands to issue ignoring those that
1611	 * are already in-flight.
1612	 */
1613	for (i = 0; (i < 32) && p->ci; i++) {
1614		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
1615			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1616			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
1617			ahci_handle_slot(p, i);
1618		}
1619	}
1620}
1621
1622/*
1623 * blockif callback routine - this runs in the context of the blockif
1624 * i/o thread, so the mutex needs to be acquired.
1625 */
1626static void
1627ata_ioreq_cb(struct blockif_req *br, int err)
1628{
1629	struct ahci_cmd_hdr *hdr;
1630	struct ahci_ioreq *aior;
1631	struct ahci_port *p;
1632	struct pci_ahci_softc *sc;
1633	uint32_t tfd;
1634	uint8_t *cfis;
1635	int pending, slot, ncq, dsm;
1636
1637	DPRINTF("%s %d\n", __func__, err);
1638
1639	ncq = dsm = 0;
1640	aior = br->br_param;
1641	p = aior->io_pr;
1642	cfis = aior->cfis;
1643	slot = aior->slot;
1644	pending = aior->prdtl;
1645	sc = p->pr_sc;
1646	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1647
1648	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1649			cfis[2] == ATA_READ_FPDMA_QUEUED)
1650		ncq = 1;
1651	if (cfis[2] == ATA_DATA_SET_MANAGEMENT)
1652		dsm = 1;
1653
1654	pthread_mutex_lock(&sc->mtx);
1655
1656	/*
1657	 * Delete the blockif request from the busy list
1658	 */
1659	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1660
1661	/*
1662	 * Move the blockif request back to the free list
1663	 */
1664	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1665
1666	if (!err)
1667		hdr->prdbc = aior->done;
1668
1669	if (dsm) {
1670		if (aior->done != aior->len && !err) {
1671			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1672			goto out;
1673		}
1674	} else {
1675		if (pending && !err) {
1676			ahci_handle_dma(p, slot, cfis, aior->done,
1677			    hdr->prdtl - pending);
1678			goto out;
1679		}
1680	}
1681
1682	if (!err && aior->done == aior->len) {
1683		tfd = ATA_S_READY | ATA_S_DSC;
1684	} else {
1685		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1686	}
1687
1688	if (ncq) {
1689		p->sact &= ~(1 << slot);
1690		ahci_write_fis_sdb(p, slot, tfd);
1691	} else
1692		ahci_write_fis_d2h(p, slot, cfis, tfd);
1693
1694	/*
1695	 * This command is now complete.
1696	 */
1697	p->pending &= ~(1 << slot);
1698
1699	ahci_check_stopped(p);
1700out:
1701	pthread_mutex_unlock(&sc->mtx);
1702	DPRINTF("%s exit\n", __func__);
1703}
1704
1705static void
1706atapi_ioreq_cb(struct blockif_req *br, int err)
1707{
1708	struct ahci_cmd_hdr *hdr;
1709	struct ahci_ioreq *aior;
1710	struct ahci_port *p;
1711	struct pci_ahci_softc *sc;
1712	uint8_t *cfis;
1713	uint32_t tfd;
1714	int pending, slot;
1715
1716	DPRINTF("%s %d\n", __func__, err);
1717
1718	aior = br->br_param;
1719	p = aior->io_pr;
1720	cfis = aior->cfis;
1721	slot = aior->slot;
1722	pending = aior->prdtl;
1723	sc = p->pr_sc;
1724	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1725
1726	pthread_mutex_lock(&sc->mtx);
1727
1728	/*
1729	 * Delete the blockif request from the busy list
1730	 */
1731	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1732
1733	/*
1734	 * Move the blockif request back to the free list
1735	 */
1736	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1737
1738	if (!err)
1739		hdr->prdbc = aior->done;
1740
1741	if (pending && !err) {
1742		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
1743		goto out;
1744	}
1745
1746	if (!err && aior->done == aior->len) {
1747		tfd = ATA_S_READY | ATA_S_DSC;
1748	} else {
1749		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1750		p->asc = 0x21;
1751		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1752	}
1753
1754	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1755	ahci_write_fis_d2h(p, slot, cfis, tfd);
1756
1757	/*
1758	 * This command is now complete.
1759	 */
1760	p->pending &= ~(1 << slot);
1761
1762	ahci_check_stopped(p);
1763out:
1764	pthread_mutex_unlock(&sc->mtx);
1765	DPRINTF("%s exit\n", __func__);
1766}
1767
1768static void
1769pci_ahci_ioreq_init(struct ahci_port *pr)
1770{
1771	struct ahci_ioreq *vr;
1772	int i;
1773
1774	pr->ioqsz = blockif_queuesz(pr->bctx);
1775	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1776	STAILQ_INIT(&pr->iofhd);
1777
1778	/*
1779	 * Add all i/o request entries to the free queue
1780	 */
1781	for (i = 0; i < pr->ioqsz; i++) {
1782		vr = &pr->ioreq[i];
1783		vr->io_pr = pr;
1784		if (!pr->atapi)
1785			vr->io_req.br_callback = ata_ioreq_cb;
1786		else
1787			vr->io_req.br_callback = atapi_ioreq_cb;
1788		vr->io_req.br_param = vr;
1789		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1790	}
1791
1792	TAILQ_INIT(&pr->iobhd);
1793}
1794
1795static void
1796pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1797{
1798	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1799	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1800	struct ahci_port *p = &sc->port[port];
1801
1802	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1803		port, offset, value);
1804
1805	switch (offset) {
1806	case AHCI_P_CLB:
1807		p->clb = value;
1808		break;
1809	case AHCI_P_CLBU:
1810		p->clbu = value;
1811		break;
1812	case AHCI_P_FB:
1813		p->fb = value;
1814		break;
1815	case AHCI_P_FBU:
1816		p->fbu = value;
1817		break;
1818	case AHCI_P_IS:
1819		p->is &= ~value;
1820		break;
1821	case AHCI_P_IE:
1822		p->ie = value & 0xFDC000FF;
1823		ahci_generate_intr(sc);
1824		break;
1825	case AHCI_P_CMD:
1826	{
1827		p->cmd = value;
1828
1829		if (!(value & AHCI_P_CMD_ST)) {
1830			ahci_port_stop(p);
1831		} else {
1832			uint64_t clb;
1833
1834			p->cmd |= AHCI_P_CMD_CR;
1835			clb = (uint64_t)p->clbu << 32 | p->clb;
1836			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
1837					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
1838		}
1839
1840		if (value & AHCI_P_CMD_FRE) {
1841			uint64_t fb;
1842
1843			p->cmd |= AHCI_P_CMD_FR;
1844			fb = (uint64_t)p->fbu << 32 | p->fb;
1845			/* we don't support FBSCP, so rfis size is 256Bytes */
1846			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
1847		} else {
1848			p->cmd &= ~AHCI_P_CMD_FR;
1849		}
1850
1851		if (value & AHCI_P_CMD_CLO) {
1852			p->tfd = 0;
1853			p->cmd &= ~AHCI_P_CMD_CLO;
1854		}
1855
1856		ahci_handle_port(p);
1857		break;
1858	}
1859	case AHCI_P_TFD:
1860	case AHCI_P_SIG:
1861	case AHCI_P_SSTS:
1862		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
1863		break;
1864	case AHCI_P_SCTL:
1865		p->sctl = value;
1866		if (!(p->cmd & AHCI_P_CMD_ST)) {
1867			if (value & ATA_SC_DET_RESET)
1868				ahci_port_reset(p);
1869		}
1870		break;
1871	case AHCI_P_SERR:
1872		p->serr &= ~value;
1873		break;
1874	case AHCI_P_SACT:
1875		p->sact |= value;
1876		break;
1877	case AHCI_P_CI:
1878		p->ci |= value;
1879		ahci_handle_port(p);
1880		break;
1881	case AHCI_P_SNTF:
1882	case AHCI_P_FBS:
1883	default:
1884		break;
1885	}
1886}
1887
1888static void
1889pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1890{
1891	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1892		offset, value);
1893
1894	switch (offset) {
1895	case AHCI_CAP:
1896	case AHCI_PI:
1897	case AHCI_VS:
1898	case AHCI_CAP2:
1899		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
1900		break;
1901	case AHCI_GHC:
1902		if (value & AHCI_GHC_HR)
1903			ahci_reset(sc);
1904		else if (value & AHCI_GHC_IE) {
1905			sc->ghc |= AHCI_GHC_IE;
1906			ahci_generate_intr(sc);
1907		}
1908		break;
1909	case AHCI_IS:
1910		sc->is &= ~value;
1911		ahci_generate_intr(sc);
1912		break;
1913	default:
1914		break;
1915	}
1916}
1917
1918static void
1919pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
1920		int baridx, uint64_t offset, int size, uint64_t value)
1921{
1922	struct pci_ahci_softc *sc = pi->pi_arg;
1923
1924	assert(baridx == 5);
1925	assert(size == 4);
1926
1927	pthread_mutex_lock(&sc->mtx);
1928
1929	if (offset < AHCI_OFFSET)
1930		pci_ahci_host_write(sc, offset, value);
1931	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
1932		pci_ahci_port_write(sc, offset, value);
1933	else
1934		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
1935
1936	pthread_mutex_unlock(&sc->mtx);
1937}
1938
1939static uint64_t
1940pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
1941{
1942	uint32_t value;
1943
1944	switch (offset) {
1945	case AHCI_CAP:
1946	case AHCI_GHC:
1947	case AHCI_IS:
1948	case AHCI_PI:
1949	case AHCI_VS:
1950	case AHCI_CCCC:
1951	case AHCI_CCCP:
1952	case AHCI_EM_LOC:
1953	case AHCI_EM_CTL:
1954	case AHCI_CAP2:
1955	{
1956		uint32_t *p = &sc->cap;
1957		p += (offset - AHCI_CAP) / sizeof(uint32_t);
1958		value = *p;
1959		break;
1960	}
1961	default:
1962		value = 0;
1963		break;
1964	}
1965	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
1966		offset, value);
1967
1968	return (value);
1969}
1970
1971static uint64_t
1972pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
1973{
1974	uint32_t value;
1975	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1976	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1977
1978	switch (offset) {
1979	case AHCI_P_CLB:
1980	case AHCI_P_CLBU:
1981	case AHCI_P_FB:
1982	case AHCI_P_FBU:
1983	case AHCI_P_IS:
1984	case AHCI_P_IE:
1985	case AHCI_P_CMD:
1986	case AHCI_P_TFD:
1987	case AHCI_P_SIG:
1988	case AHCI_P_SSTS:
1989	case AHCI_P_SCTL:
1990	case AHCI_P_SERR:
1991	case AHCI_P_SACT:
1992	case AHCI_P_CI:
1993	case AHCI_P_SNTF:
1994	case AHCI_P_FBS:
1995	{
1996		uint32_t *p= &sc->port[port].clb;
1997		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
1998		value = *p;
1999		break;
2000	}
2001	default:
2002		value = 0;
2003		break;
2004	}
2005
2006	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2007		port, offset, value);
2008
2009	return value;
2010}
2011
2012static uint64_t
2013pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2014    uint64_t offset, int size)
2015{
2016	struct pci_ahci_softc *sc = pi->pi_arg;
2017	uint32_t value;
2018
2019	assert(baridx == 5);
2020	assert(size == 4);
2021
2022	pthread_mutex_lock(&sc->mtx);
2023
2024	if (offset < AHCI_OFFSET)
2025		value = pci_ahci_host_read(sc, offset);
2026	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2027		value = pci_ahci_port_read(sc, offset);
2028	else {
2029		value = 0;
2030		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
2031	}
2032
2033	pthread_mutex_unlock(&sc->mtx);
2034
2035	return (value);
2036}
2037
2038static int
2039pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2040{
2041	char bident[sizeof("XX:X:X")];
2042	struct blockif_ctxt *bctxt;
2043	struct pci_ahci_softc *sc;
2044	int ret, slots;
2045
2046	ret = 0;
2047
2048	if (opts == NULL) {
2049		fprintf(stderr, "pci_ahci: backing device required\n");
2050		return (1);
2051	}
2052
2053#ifdef AHCI_DEBUG
2054	dbg = fopen("/tmp/log", "w+");
2055#endif
2056
2057	sc = calloc(1, sizeof(struct pci_ahci_softc));
2058	pi->pi_arg = sc;
2059	sc->asc_pi = pi;
2060	sc->ports = MAX_PORTS;
2061
2062	/*
2063	 * Only use port 0 for a backing device. All other ports will be
2064	 * marked as unused
2065	 */
2066	sc->port[0].atapi = atapi;
2067
2068	/*
2069	 * Attempt to open the backing image. Use the PCI
2070	 * slot/func for the identifier string.
2071	 */
2072	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2073	bctxt = blockif_open(opts, bident);
2074	if (bctxt == NULL) {
2075		ret = 1;
2076		goto open_fail;
2077	}
2078	sc->port[0].bctx = bctxt;
2079	sc->port[0].pr_sc = sc;
2080
2081	/*
2082	 * Allocate blockif request structures and add them
2083	 * to the free list
2084	 */
2085	pci_ahci_ioreq_init(&sc->port[0]);
2086
2087	pthread_mutex_init(&sc->mtx, NULL);
2088
2089	/* Intel ICH8 AHCI */
2090	slots = sc->port[0].ioqsz;
2091	if (slots > 32)
2092		slots = 32;
2093	--slots;
2094	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2095	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2096	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2097	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2098	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2099
2100	/* Only port 0 implemented */
2101	sc->pi = 1;
2102	sc->vs = 0x10300;
2103	sc->cap2 = AHCI_CAP2_APST;
2104	ahci_reset(sc);
2105
2106	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2107	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2108	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2109	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2110	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2111	pci_emul_add_msicap(pi, 1);
2112	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2113	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2114
2115	pci_lintr_request(pi);
2116
2117open_fail:
2118	if (ret) {
2119		blockif_close(sc->port[0].bctx);
2120		free(sc);
2121	}
2122
2123	return (ret);
2124}
2125
2126static int
2127pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2128{
2129
2130	return (pci_ahci_init(ctx, pi, opts, 0));
2131}
2132
2133static int
2134pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2135{
2136
2137	return (pci_ahci_init(ctx, pi, opts, 1));
2138}
2139
2140/*
2141 * Use separate emulation names to distinguish drive and atapi devices
2142 */
2143struct pci_devemu pci_de_ahci_hd = {
2144	.pe_emu =	"ahci-hd",
2145	.pe_init =	pci_ahci_hd_init,
2146	.pe_barwrite =	pci_ahci_write,
2147	.pe_barread =	pci_ahci_read
2148};
2149PCI_EMUL_SET(pci_de_ahci_hd);
2150
2151struct pci_devemu pci_de_ahci_cd = {
2152	.pe_emu =	"ahci-cd",
2153	.pe_init =	pci_ahci_atapi_init,
2154	.pe_barwrite =	pci_ahci_write,
2155	.pe_barread =	pci_ahci_read
2156};
2157PCI_EMUL_SET(pci_de_ahci_cd);
2158