1/*	$OpenBSD: nvme.c,v 1.117 2024/06/04 20:31:35 krw Exp $ */
2
3/*
4 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bio.h"
20
21#include <sys/param.h>
22#include <sys/ioctl.h>
23#include <sys/systm.h>
24#include <sys/buf.h>
25#include <sys/kernel.h>
26#include <sys/malloc.h>
27#include <sys/device.h>
28#include <sys/queue.h>
29#include <sys/mutex.h>
30#include <sys/pool.h>
31#include <sys/disk.h>
32
33#include <sys/atomic.h>
34
35#include <machine/bus.h>
36
37#include <scsi/scsi_all.h>
38#include <scsi/scsi_disk.h>
39#include <scsi/scsiconf.h>
40#include <scsi/sdvar.h>
41
42#include <dev/biovar.h>
43#include <dev/ic/nvmereg.h>
44#include <dev/ic/nvmevar.h>
45#include <dev/ic/nvmeio.h>
46
47struct cfdriver nvme_cd = {
48	NULL,
49	"nvme",
50	DV_DULL
51};
52
53int	nvme_ready(struct nvme_softc *, u_int32_t);
54int	nvme_enable(struct nvme_softc *);
55int	nvme_disable(struct nvme_softc *);
56int	nvme_shutdown(struct nvme_softc *);
57int	nvme_resume(struct nvme_softc *);
58
59void	nvme_dumpregs(struct nvme_softc *);
60int	nvme_identify(struct nvme_softc *, u_int);
61void	nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *);
62
63int	nvme_ccbs_alloc(struct nvme_softc *, u_int);
64void	nvme_ccbs_free(struct nvme_softc *, u_int);
65
66void *	nvme_ccb_get(void *);
67void	nvme_ccb_put(void *, void *);
68
69int	nvme_poll(struct nvme_softc *, struct nvme_queue *, struct nvme_ccb *,
70	    void (*)(struct nvme_softc *, struct nvme_ccb *, void *), u_int32_t);
71void	nvme_poll_fill(struct nvme_softc *, struct nvme_ccb *, void *);
72void	nvme_poll_done(struct nvme_softc *, struct nvme_ccb *,
73	    struct nvme_cqe *);
74void	nvme_sqe_fill(struct nvme_softc *, struct nvme_ccb *, void *);
75void	nvme_empty_done(struct nvme_softc *, struct nvme_ccb *,
76	    struct nvme_cqe *);
77
78struct nvme_queue *
79	nvme_q_alloc(struct nvme_softc *, u_int16_t, u_int, u_int);
80int	nvme_q_create(struct nvme_softc *, struct nvme_queue *);
81int	nvme_q_reset(struct nvme_softc *, struct nvme_queue *);
82int	nvme_q_delete(struct nvme_softc *, struct nvme_queue *);
83void	nvme_q_submit(struct nvme_softc *,
84	    struct nvme_queue *, struct nvme_ccb *,
85	    void (*)(struct nvme_softc *, struct nvme_ccb *, void *));
86int	nvme_q_complete(struct nvme_softc *, struct nvme_queue *);
87void	nvme_q_free(struct nvme_softc *, struct nvme_queue *);
88
89void	nvme_scsi_cmd(struct scsi_xfer *);
90void	nvme_minphys(struct buf *, struct scsi_link *);
91int	nvme_scsi_probe(struct scsi_link *);
92void	nvme_scsi_free(struct scsi_link *);
93uint64_t nvme_scsi_size(const struct nvm_identify_namespace *);
94int	nvme_scsi_ioctl(struct scsi_link *, u_long, caddr_t, int);
95int	nvme_passthrough_cmd(struct nvme_softc *, struct nvme_pt_cmd *,
96	int, int);
97
98#ifdef HIBERNATE
99#include <uvm/uvm_extern.h>
100#include <sys/hibernate.h>
101#include <sys/disklabel.h>
102
103int	nvme_hibernate_io(dev_t, daddr_t, vaddr_t, size_t, int, void *);
104#endif
105
106#if NBIO > 0
107void	nvme_bio_status(struct bio_status *, const char *, ...);
108
109const char *nvme_bioctl_sdname(const struct nvme_softc *, int);
110
111int	nvme_bioctl(struct device *, u_long, caddr_t);
112int	nvme_bioctl_inq(struct nvme_softc *, struct bioc_inq *);
113int	nvme_bioctl_vol(struct nvme_softc *, struct bioc_vol *);
114int	nvme_bioctl_disk(struct nvme_softc *, struct bioc_disk *);
115#endif	/* NBIO > 0 */
116
117const struct scsi_adapter nvme_switch = {
118	nvme_scsi_cmd, nvme_minphys, nvme_scsi_probe, nvme_scsi_free,
119	nvme_scsi_ioctl
120};
121
122void	nvme_scsi_io(struct scsi_xfer *, int);
123void	nvme_scsi_io_fill(struct nvme_softc *, struct nvme_ccb *, void *);
124void	nvme_scsi_io_done(struct nvme_softc *, struct nvme_ccb *,
125	    struct nvme_cqe *);
126
127void	nvme_scsi_sync(struct scsi_xfer *);
128void	nvme_scsi_sync_fill(struct nvme_softc *, struct nvme_ccb *, void *);
129void	nvme_scsi_sync_done(struct nvme_softc *, struct nvme_ccb *,
130	    struct nvme_cqe *);
131
132void	nvme_scsi_inq(struct scsi_xfer *);
133void	nvme_scsi_inquiry(struct scsi_xfer *);
134void	nvme_scsi_capacity16(struct scsi_xfer *);
135void	nvme_scsi_capacity(struct scsi_xfer *);
136
137uint32_t	nvme_op_sq_enter(struct nvme_softc *,
138		    struct nvme_queue *, struct nvme_ccb *);
139void		nvme_op_sq_leave(struct nvme_softc *,
140		    struct nvme_queue *, struct nvme_ccb *);
141uint32_t	nvme_op_sq_enter_locked(struct nvme_softc *,
142		    struct nvme_queue *, struct nvme_ccb *);
143void		nvme_op_sq_leave_locked(struct nvme_softc *,
144		    struct nvme_queue *, struct nvme_ccb *);
145
146void		nvme_op_cq_done(struct nvme_softc *,
147		    struct nvme_queue *, struct nvme_ccb *);
148
149static const struct nvme_ops nvme_ops = {
150	.op_sq_enter		= nvme_op_sq_enter,
151	.op_sq_leave		= nvme_op_sq_leave,
152	.op_sq_enter_locked	= nvme_op_sq_enter_locked,
153	.op_sq_leave_locked	= nvme_op_sq_leave_locked,
154
155	.op_cq_done		= nvme_op_cq_done,
156};
157
158#define NVME_TIMO_QOP			5000	/* ms to create/delete queue */
159#define NVME_TIMO_PT			5000	/* ms to complete passthrough */
160#define NVME_TIMO_IDENT			10000	/* ms to probe/identify */
161#define NVME_TIMO_DELAYNS		10	/* ns to delay() in poll loop */
162
163/*
164 * Some controllers, at least Apple NVMe, always require split
165 * transfers, so don't use bus_space_{read,write}_8() on LP64.
166 */
167u_int64_t
168nvme_read8(struct nvme_softc *sc, bus_size_t r)
169{
170	u_int64_t v;
171
172	v = (u_int64_t)nvme_read4(sc, r) |
173	    (u_int64_t)nvme_read4(sc, r + 4) << 32;
174
175	return (v);
176}
177
178void
179nvme_write8(struct nvme_softc *sc, bus_size_t r, u_int64_t v)
180{
181	nvme_write4(sc, r, v);
182	nvme_write4(sc, r + 4, v >> 32);
183}
184
185void
186nvme_dumpregs(struct nvme_softc *sc)
187{
188	u_int64_t r8;
189	u_int32_t r4;
190
191	r8 = nvme_read8(sc, NVME_CAP);
192	printf("%s: cap  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP));
193	printf("%s:  mpsmax %u (%u)\n", DEVNAME(sc),
194	    (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8)));
195	printf("%s:  mpsmin %u (%u)\n", DEVNAME(sc),
196	    (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8)));
197	printf("%s:  css %llu\n", DEVNAME(sc), NVME_CAP_CSS(r8));
198	printf("%s:  nssrs %llu\n", DEVNAME(sc), NVME_CAP_NSSRS(r8));
199	printf("%s:  dstrd %u\n", DEVNAME(sc), NVME_CAP_DSTRD(r8));
200	printf("%s:  to %llu msec\n", DEVNAME(sc), NVME_CAP_TO(r8));
201	printf("%s:  ams %llu\n", DEVNAME(sc), NVME_CAP_AMS(r8));
202	printf("%s:  cqr %llu\n", DEVNAME(sc), NVME_CAP_CQR(r8));
203	printf("%s:  mqes %llu\n", DEVNAME(sc), NVME_CAP_MQES(r8));
204
205	printf("%s: vs   0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS));
206
207	r4 = nvme_read4(sc, NVME_CC);
208	printf("%s: cc   0x%04x\n", DEVNAME(sc), r4);
209	printf("%s:  iocqes %u\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4));
210	printf("%s:  iosqes %u\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4));
211	printf("%s:  shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4));
212	printf("%s:  ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4));
213	printf("%s:  mps %u\n", DEVNAME(sc), NVME_CC_MPS_R(r4));
214	printf("%s:  css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4));
215	printf("%s:  en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN));
216
217	printf("%s: csts 0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_CSTS));
218	printf("%s: aqa  0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_AQA));
219	printf("%s: asq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ));
220	printf("%s: acq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ));
221}
222
223int
224nvme_ready(struct nvme_softc *sc, u_int32_t rdy)
225{
226	u_int i = 0;
227
228	while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) {
229		if (i++ > sc->sc_rdy_to)
230			return (1);
231
232		delay(1000);
233		nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ);
234	}
235
236	return (0);
237}
238
239int
240nvme_enable(struct nvme_softc *sc)
241{
242	u_int32_t cc;
243
244	cc = nvme_read4(sc, NVME_CC);
245	if (ISSET(cc, NVME_CC_EN))
246		return (nvme_ready(sc, NVME_CSTS_RDY));
247
248	if (sc->sc_ops->op_enable != NULL)
249		sc->sc_ops->op_enable(sc);
250
251	nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) |
252	    NVME_AQA_ASQS(sc->sc_admin_q->q_entries));
253	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
254
255	nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem));
256	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
257	nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem));
258	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
259
260	CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK |
261	    NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK);
262	SET(cc, NVME_CC_IOSQES(6));	/* Submission queue size == 2**6 (64) */
263	SET(cc, NVME_CC_IOCQES(4));	/* Completion queue size == 2**4 (16) */
264	SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE));
265	SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM));
266	SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR));
267	SET(cc, NVME_CC_MPS(ffs(sc->sc_mps) - 1));
268	SET(cc, NVME_CC_EN);
269
270	nvme_write4(sc, NVME_CC, cc);
271	nvme_barrier(sc, 0, sc->sc_ios,
272	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
273
274	return (nvme_ready(sc, NVME_CSTS_RDY));
275}
276
277int
278nvme_disable(struct nvme_softc *sc)
279{
280	u_int32_t cc, csts;
281
282	cc = nvme_read4(sc, NVME_CC);
283	if (ISSET(cc, NVME_CC_EN)) {
284		csts = nvme_read4(sc, NVME_CSTS);
285		if (!ISSET(csts, NVME_CSTS_CFS) &&
286		    nvme_ready(sc, NVME_CSTS_RDY) != 0)
287			return (1);
288	}
289
290	CLR(cc, NVME_CC_EN);
291
292	nvme_write4(sc, NVME_CC, cc);
293	nvme_barrier(sc, 0, sc->sc_ios,
294	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
295
296	return (nvme_ready(sc, 0));
297}
298
299int
300nvme_attach(struct nvme_softc *sc)
301{
302	struct scsibus_attach_args saa;
303	u_int64_t cap;
304	u_int32_t reg;
305	u_int nccbs = 0;
306
307	mtx_init(&sc->sc_ccb_mtx, IPL_BIO);
308	rw_init(&sc->sc_lock, "nvme_lock");
309	SIMPLEQ_INIT(&sc->sc_ccb_list);
310	scsi_iopool_init(&sc->sc_iopool, sc, nvme_ccb_get, nvme_ccb_put);
311	if (sc->sc_ops == NULL)
312		sc->sc_ops = &nvme_ops;
313	if (sc->sc_openings == 0)
314		sc->sc_openings = 64;
315
316	reg = nvme_read4(sc, NVME_VS);
317	if (reg == 0xffffffff) {
318		printf("invalid mapping\n");
319		return (1);
320	}
321
322	printf("NVMe %d.%d\n", NVME_VS_MJR(reg), NVME_VS_MNR(reg));
323
324	cap = nvme_read8(sc, NVME_CAP);
325	sc->sc_dstrd = NVME_CAP_DSTRD(cap);
326	if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) {
327		printf("%s: NVMe minimum page size %u "
328		    "is greater than CPU page size %u\n", DEVNAME(sc),
329		    1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT);
330		return (1);
331	}
332	if (NVME_CAP_MPSMAX(cap) < PAGE_SHIFT)
333		sc->sc_mps = 1 << NVME_CAP_MPSMAX(cap);
334	else
335		sc->sc_mps = 1 << PAGE_SHIFT;
336
337	sc->sc_rdy_to = NVME_CAP_TO(cap);
338	sc->sc_mdts = MAXPHYS;
339	sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps;
340
341	if (nvme_disable(sc) != 0) {
342		printf("%s: unable to disable controller\n", DEVNAME(sc));
343		return (1);
344	}
345
346	sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, 128, sc->sc_dstrd);
347	if (sc->sc_admin_q == NULL) {
348		printf("%s: unable to allocate admin queue\n", DEVNAME(sc));
349		return (1);
350	}
351
352	if (nvme_ccbs_alloc(sc, 16) != 0) {
353		printf("%s: unable to allocate initial ccbs\n", DEVNAME(sc));
354		goto free_admin_q;
355	}
356	nccbs = 16;
357
358	if (nvme_enable(sc) != 0) {
359		printf("%s: unable to enable controller\n", DEVNAME(sc));
360		goto free_ccbs;
361	}
362
363	if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) {
364		printf("%s: unable to identify controller\n", DEVNAME(sc));
365		goto disable;
366	}
367
368	/* We now know the real values of sc_mdts and sc_max_prpl. */
369	nvme_ccbs_free(sc, nccbs);
370	if (nvme_ccbs_alloc(sc, 64) != 0) {
371		printf("%s: unable to allocate ccbs\n", DEVNAME(sc));
372		goto free_admin_q;
373	}
374	nccbs = 64;
375
376	sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
377	if (sc->sc_q == NULL) {
378		printf("%s: unable to allocate io q\n", DEVNAME(sc));
379		goto disable;
380	}
381
382	if (nvme_q_create(sc, sc->sc_q) != 0) {
383		printf("%s: unable to create io q\n", DEVNAME(sc));
384		goto free_q;
385	}
386
387#ifdef HIBERNATE
388	sc->sc_hib_q = nvme_q_alloc(sc, NVME_HIB_Q, 4, sc->sc_dstrd);
389	if (sc->sc_hib_q == NULL) {
390		printf("%s: unable to allocate hibernate io queue\n", DEVNAME(sc));
391		goto free_q;
392	}
393#endif
394
395	nvme_write4(sc, NVME_INTMC, 1);
396
397	sc->sc_namespaces = mallocarray(sc->sc_nn + 1,
398	    sizeof(*sc->sc_namespaces), M_DEVBUF, M_WAITOK|M_ZERO);
399
400	saa.saa_adapter = &nvme_switch;
401	saa.saa_adapter_softc = sc;
402	saa.saa_adapter_buswidth = sc->sc_nn + 1;
403	saa.saa_luns = 1;
404	saa.saa_adapter_target = 0;
405	saa.saa_openings = sc->sc_openings;
406	saa.saa_pool = &sc->sc_iopool;
407	saa.saa_quirks = saa.saa_flags = 0;
408	saa.saa_wwpn = saa.saa_wwnn = 0;
409
410	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
411	    &saa, scsiprint);
412#if NBIO > 0
413	if (bio_register(&sc->sc_dev, nvme_bioctl) != 0)
414		printf("%s: unable to register bioctl\n", DEVNAME(sc));
415#endif	/* NBIO > 0 */
416
417	return (0);
418
419free_q:
420	nvme_q_free(sc, sc->sc_q);
421disable:
422	nvme_disable(sc);
423free_ccbs:
424	nvme_ccbs_free(sc, nccbs);
425free_admin_q:
426	nvme_q_free(sc, sc->sc_admin_q);
427
428	return (1);
429}
430
431int
432nvme_resume(struct nvme_softc *sc)
433{
434	if (nvme_disable(sc) != 0) {
435		printf("%s: unable to disable controller\n", DEVNAME(sc));
436		return (1);
437	}
438
439	if (nvme_q_reset(sc, sc->sc_admin_q) != 0) {
440		printf("%s: unable to reset admin queue\n", DEVNAME(sc));
441		return (1);
442	}
443
444	if (nvme_enable(sc) != 0) {
445		printf("%s: unable to enable controller\n", DEVNAME(sc));
446		return (1);
447	}
448
449	sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
450	if (sc->sc_q == NULL) {
451		printf("%s: unable to allocate io q\n", DEVNAME(sc));
452		goto disable;
453	}
454
455	if (nvme_q_create(sc, sc->sc_q) != 0) {
456		printf("%s: unable to create io q\n", DEVNAME(sc));
457		goto free_q;
458	}
459
460	nvme_write4(sc, NVME_INTMC, 1);
461
462	return (0);
463
464free_q:
465	nvme_q_free(sc, sc->sc_q);
466disable:
467	nvme_disable(sc);
468
469	return (1);
470}
471
472int
473nvme_scsi_probe(struct scsi_link *link)
474{
475	struct nvme_softc *sc = link->bus->sb_adapter_softc;
476	struct nvme_sqe sqe;
477	struct nvm_identify_namespace *identify;
478	struct nvme_dmamem *mem;
479	struct nvme_ccb *ccb;
480	int rv;
481
482	ccb = scsi_io_get(&sc->sc_iopool, 0);
483	KASSERT(ccb != NULL);
484
485	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
486	if (mem == NULL)
487		return (ENOMEM);
488
489	memset(&sqe, 0, sizeof(sqe));
490	sqe.opcode = NVM_ADMIN_IDENTIFY;
491	htolem32(&sqe.nsid, link->target);
492	htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
493	htolem32(&sqe.cdw10, 0);
494
495	ccb->ccb_done = nvme_empty_done;
496	ccb->ccb_cookie = &sqe;
497
498	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
499	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT);
500	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
501
502	scsi_io_put(&sc->sc_iopool, ccb);
503
504	identify = NVME_DMA_KVA(mem);
505	if (rv == 0) {
506		if (nvme_scsi_size(identify) > 0) {
507			/* Commit namespace if it has a size greater than zero. */
508			identify = malloc(sizeof(*identify), M_DEVBUF, M_WAITOK);
509			memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify));
510			sc->sc_namespaces[link->target].ident = identify;
511		} else {
512			/* Don't attach a namespace if its size is zero. */
513			rv = ENXIO;
514		}
515	}
516
517	nvme_dmamem_free(sc, mem);
518
519	return (rv);
520}
521
522int
523nvme_shutdown(struct nvme_softc *sc)
524{
525	u_int32_t cc, csts;
526	int i;
527
528	nvme_write4(sc, NVME_INTMC, 0);
529
530	if (nvme_q_delete(sc, sc->sc_q) != 0) {
531		printf("%s: unable to delete q, disabling\n", DEVNAME(sc));
532		goto disable;
533	}
534
535	cc = nvme_read4(sc, NVME_CC);
536	CLR(cc, NVME_CC_SHN_MASK);
537	SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL));
538	nvme_write4(sc, NVME_CC, cc);
539
540	for (i = 0; i < 4000; i++) {
541		nvme_barrier(sc, 0, sc->sc_ios,
542		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
543		csts = nvme_read4(sc, NVME_CSTS);
544		if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE)
545			return (0);
546
547		delay(1000);
548	}
549
550	printf("%s: unable to shutdown, disabling\n", DEVNAME(sc));
551
552disable:
553	nvme_disable(sc);
554	return (0);
555}
556
557int
558nvme_activate(struct nvme_softc *sc, int act)
559{
560	int rv;
561
562	switch (act) {
563	case DVACT_POWERDOWN:
564		rv = config_activate_children(&sc->sc_dev, act);
565		nvme_shutdown(sc);
566		break;
567	case DVACT_RESUME:
568		rv = nvme_resume(sc);
569		if (rv == 0)
570			rv = config_activate_children(&sc->sc_dev, act);
571		break;
572	default:
573		rv = config_activate_children(&sc->sc_dev, act);
574		break;
575	}
576
577	return (rv);
578}
579
580void
581nvme_scsi_cmd(struct scsi_xfer *xs)
582{
583	switch (xs->cmd.opcode) {
584	case READ_COMMAND:
585	case READ_10:
586	case READ_12:
587	case READ_16:
588		nvme_scsi_io(xs, SCSI_DATA_IN);
589		return;
590	case WRITE_COMMAND:
591	case WRITE_10:
592	case WRITE_12:
593	case WRITE_16:
594		nvme_scsi_io(xs, SCSI_DATA_OUT);
595		return;
596
597	case SYNCHRONIZE_CACHE:
598		nvme_scsi_sync(xs);
599		return;
600
601	case INQUIRY:
602		nvme_scsi_inq(xs);
603		return;
604	case READ_CAPACITY_16:
605		nvme_scsi_capacity16(xs);
606		return;
607	case READ_CAPACITY:
608		nvme_scsi_capacity(xs);
609		return;
610
611	case TEST_UNIT_READY:
612	case PREVENT_ALLOW:
613	case START_STOP:
614		xs->error = XS_NOERROR;
615		scsi_done(xs);
616		return;
617
618	default:
619		break;
620	}
621
622	xs->error = XS_DRIVER_STUFFUP;
623	scsi_done(xs);
624}
625
626void
627nvme_minphys(struct buf *bp, struct scsi_link *link)
628{
629	struct nvme_softc *sc = link->bus->sb_adapter_softc;
630
631	if (bp->b_bcount > sc->sc_mdts)
632		bp->b_bcount = sc->sc_mdts;
633}
634
635void
636nvme_scsi_io(struct scsi_xfer *xs, int dir)
637{
638	struct scsi_link *link = xs->sc_link;
639	struct nvme_softc *sc = link->bus->sb_adapter_softc;
640	struct nvme_ccb *ccb = xs->io;
641	bus_dmamap_t dmap = ccb->ccb_dmamap;
642	int i;
643
644	if ((xs->flags & (SCSI_DATA_IN|SCSI_DATA_OUT)) != dir)
645		goto stuffup;
646
647	ccb->ccb_done = nvme_scsi_io_done;
648	ccb->ccb_cookie = xs;
649
650	if (bus_dmamap_load(sc->sc_dmat, dmap,
651	    xs->data, xs->datalen, NULL, ISSET(xs->flags, SCSI_NOSLEEP) ?
652	    BUS_DMA_NOWAIT : BUS_DMA_WAITOK) != 0)
653		goto stuffup;
654
655	bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
656	    ISSET(xs->flags, SCSI_DATA_IN) ?
657	    BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
658
659	if (dmap->dm_nsegs > 2) {
660		for (i = 1; i < dmap->dm_nsegs; i++) {
661			htolem64(&ccb->ccb_prpl[i - 1],
662			    dmap->dm_segs[i].ds_addr);
663		}
664		bus_dmamap_sync(sc->sc_dmat,
665		    NVME_DMA_MAP(sc->sc_ccb_prpls),
666		    ccb->ccb_prpl_off,
667		    sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1),
668		    BUS_DMASYNC_PREWRITE);
669	}
670
671	if (ISSET(xs->flags, SCSI_POLL)) {
672		nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_io_fill, xs->timeout);
673		return;
674	}
675
676	nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_io_fill);
677	return;
678
679stuffup:
680	xs->error = XS_DRIVER_STUFFUP;
681	scsi_done(xs);
682}
683
684void
685nvme_scsi_io_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
686{
687	struct nvme_sqe_io *sqe = slot;
688	struct scsi_xfer *xs = ccb->ccb_cookie;
689	struct scsi_link *link = xs->sc_link;
690	bus_dmamap_t dmap = ccb->ccb_dmamap;
691	u_int64_t lba;
692	u_int32_t blocks;
693
694	scsi_cmd_rw_decode(&xs->cmd, &lba, &blocks);
695
696	sqe->opcode = ISSET(xs->flags, SCSI_DATA_IN) ?
697	    NVM_CMD_READ : NVM_CMD_WRITE;
698	htolem32(&sqe->nsid, link->target);
699
700	htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr);
701	switch (dmap->dm_nsegs) {
702	case 1:
703		break;
704	case 2:
705		htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr);
706		break;
707	default:
708		/* the prp list is already set up and synced */
709		htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva);
710		break;
711	}
712
713	htolem64(&sqe->slba, lba);
714	htolem16(&sqe->nlb, blocks - 1);
715}
716
717void
718nvme_scsi_io_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
719    struct nvme_cqe *cqe)
720{
721	struct scsi_xfer *xs = ccb->ccb_cookie;
722	bus_dmamap_t dmap = ccb->ccb_dmamap;
723	u_int16_t flags;
724
725	if (dmap->dm_nsegs > 2) {
726		bus_dmamap_sync(sc->sc_dmat,
727		    NVME_DMA_MAP(sc->sc_ccb_prpls),
728		    ccb->ccb_prpl_off,
729		    sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1),
730		    BUS_DMASYNC_POSTWRITE);
731	}
732
733	bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
734	    ISSET(xs->flags, SCSI_DATA_IN) ?
735	    BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
736
737	bus_dmamap_unload(sc->sc_dmat, dmap);
738
739	flags = lemtoh16(&cqe->flags);
740
741	xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ?
742	    XS_NOERROR : XS_DRIVER_STUFFUP;
743	xs->status = SCSI_OK;
744	xs->resid = 0;
745	scsi_done(xs);
746}
747
748void
749nvme_scsi_sync(struct scsi_xfer *xs)
750{
751	struct scsi_link *link = xs->sc_link;
752	struct nvme_softc *sc = link->bus->sb_adapter_softc;
753	struct nvme_ccb *ccb = xs->io;
754
755	ccb->ccb_done = nvme_scsi_sync_done;
756	ccb->ccb_cookie = xs;
757
758	if (ISSET(xs->flags, SCSI_POLL)) {
759		nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_sync_fill, xs->timeout);
760		return;
761	}
762
763	nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_sync_fill);
764}
765
766void
767nvme_scsi_sync_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
768{
769	struct nvme_sqe *sqe = slot;
770	struct scsi_xfer *xs = ccb->ccb_cookie;
771	struct scsi_link *link = xs->sc_link;
772
773	sqe->opcode = NVM_CMD_FLUSH;
774	htolem32(&sqe->nsid, link->target);
775}
776
777void
778nvme_scsi_sync_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
779    struct nvme_cqe *cqe)
780{
781	struct scsi_xfer *xs = ccb->ccb_cookie;
782	u_int16_t flags;
783
784	flags = lemtoh16(&cqe->flags);
785
786	xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ?
787	    XS_NOERROR : XS_DRIVER_STUFFUP;
788	xs->status = SCSI_OK;
789	xs->resid = 0;
790	scsi_done(xs);
791}
792
793void
794nvme_scsi_inq(struct scsi_xfer *xs)
795{
796	struct scsi_inquiry *inq = (struct scsi_inquiry *)&xs->cmd;
797
798	if (!ISSET(inq->flags, SI_EVPD)) {
799		nvme_scsi_inquiry(xs);
800		return;
801	}
802
803	switch (inq->pagecode) {
804	default:
805		/* printf("%s: %d\n", __func__, inq->pagecode); */
806		break;
807	}
808
809	xs->error = XS_DRIVER_STUFFUP;
810	scsi_done(xs);
811}
812
813void
814nvme_scsi_inquiry(struct scsi_xfer *xs)
815{
816	struct scsi_inquiry_data inq;
817	struct scsi_link *link = xs->sc_link;
818	struct nvme_softc *sc = link->bus->sb_adapter_softc;
819	struct nvm_identify_namespace *ns;
820
821	ns = sc->sc_namespaces[link->target].ident;
822
823	memset(&inq, 0, sizeof(inq));
824
825	inq.device = T_DIRECT;
826	inq.version = SCSI_REV_SPC4;
827	inq.response_format = SID_SCSI2_RESPONSE;
828	inq.additional_length = SID_SCSI2_ALEN;
829	inq.flags |= SID_CmdQue;
830	memcpy(inq.vendor, "NVMe    ", sizeof(inq.vendor));
831	memcpy(inq.product, sc->sc_identify.mn, sizeof(inq.product));
832	memcpy(inq.revision, sc->sc_identify.fr, sizeof(inq.revision));
833
834	scsi_copy_internal_data(xs, &inq, sizeof(inq));
835
836	xs->error = XS_NOERROR;
837	scsi_done(xs);
838}
839
840void
841nvme_scsi_capacity16(struct scsi_xfer *xs)
842{
843	struct scsi_read_cap_data_16 rcd;
844	struct scsi_link *link = xs->sc_link;
845	struct nvme_softc *sc = link->bus->sb_adapter_softc;
846	struct nvm_identify_namespace *ns;
847	struct nvm_namespace_format *f;
848	u_int64_t addr;
849	u_int16_t tpe = READ_CAP_16_TPE;
850
851	ns = sc->sc_namespaces[link->target].ident;
852
853	if (xs->cmdlen != sizeof(struct scsi_read_capacity_16)) {
854		xs->error = XS_DRIVER_STUFFUP;
855		scsi_done(xs);
856		return;
857	}
858
859	addr = nvme_scsi_size(ns) - 1;
860	f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
861
862	memset(&rcd, 0, sizeof(rcd));
863	_lto8b(addr, rcd.addr);
864	_lto4b(1 << f->lbads, rcd.length);
865	_lto2b(tpe, rcd.lowest_aligned);
866
867	memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen));
868
869	xs->error = XS_NOERROR;
870	scsi_done(xs);
871}
872
873void
874nvme_scsi_capacity(struct scsi_xfer *xs)
875{
876	struct scsi_read_cap_data rcd;
877	struct scsi_link *link = xs->sc_link;
878	struct nvme_softc *sc = link->bus->sb_adapter_softc;
879	struct nvm_identify_namespace *ns;
880	struct nvm_namespace_format *f;
881	u_int64_t addr;
882
883	ns = sc->sc_namespaces[link->target].ident;
884
885	if (xs->cmdlen != sizeof(struct scsi_read_capacity)) {
886		xs->error = XS_DRIVER_STUFFUP;
887		scsi_done(xs);
888		return;
889	}
890
891	addr = nvme_scsi_size(ns) - 1;
892	if (addr > 0xffffffff)
893		addr = 0xffffffff;
894
895	f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
896
897	memset(&rcd, 0, sizeof(rcd));
898	_lto4b(addr, rcd.addr);
899	_lto4b(1 << f->lbads, rcd.length);
900
901	memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen));
902
903	xs->error = XS_NOERROR;
904	scsi_done(xs);
905}
906
907void
908nvme_scsi_free(struct scsi_link *link)
909{
910	struct nvme_softc *sc = link->bus->sb_adapter_softc;
911	struct nvm_identify_namespace *identify;
912
913	identify = sc->sc_namespaces[link->target].ident;
914	sc->sc_namespaces[link->target].ident = NULL;
915
916	free(identify, M_DEVBUF, sizeof(*identify));
917}
918
919uint64_t
920nvme_scsi_size(const struct nvm_identify_namespace *ns)
921{
922	uint64_t		ncap, nsze;
923
924	ncap = lemtoh64(&ns->ncap); /* Max allowed allocation. */
925	nsze = lemtoh64(&ns->nsze);
926
927	if ((ns->nsfeat & NVME_ID_NS_NSFEAT_THIN_PROV) && ncap < nsze)
928		return ncap;
929	else
930		return nsze;
931}
932
933int
934nvme_passthrough_cmd(struct nvme_softc *sc, struct nvme_pt_cmd *pt, int dv_unit,
935    int nsid)
936{
937	struct nvme_pt_status		 pt_status;
938	struct nvme_sqe			 sqe;
939	struct nvme_dmamem		*mem = NULL;
940	struct nvme_ccb			*ccb = NULL;
941	int				 flags;
942	int				 rv = 0;
943
944	ccb = nvme_ccb_get(sc);
945	if (ccb == NULL)
946		panic("nvme_passthrough_cmd: nvme_ccb_get returned NULL");
947
948	memset(&sqe, 0, sizeof(sqe));
949	sqe.opcode = pt->pt_opcode;
950	htolem32(&sqe.nsid, pt->pt_nsid);
951	htolem32(&sqe.cdw10, pt->pt_cdw10);
952	htolem32(&sqe.cdw11, pt->pt_cdw11);
953	htolem32(&sqe.cdw12, pt->pt_cdw12);
954	htolem32(&sqe.cdw13, pt->pt_cdw13);
955	htolem32(&sqe.cdw14, pt->pt_cdw14);
956	htolem32(&sqe.cdw15, pt->pt_cdw15);
957
958	ccb->ccb_done = nvme_empty_done;
959	ccb->ccb_cookie = &sqe;
960
961	switch (pt->pt_opcode) {
962	case NVM_ADMIN_IDENTIFY:
963	case NVM_ADMIN_GET_LOG_PG:
964	case NVM_ADMIN_SELFTEST:
965		break;
966
967	default:
968		rv = ENOTTY;
969		goto done;
970	}
971
972	if (pt->pt_databuflen > 0) {
973		mem = nvme_dmamem_alloc(sc, pt->pt_databuflen);
974		if (mem == NULL) {
975			rv = ENOMEM;
976			goto done;
977		}
978		htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
979		nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
980	}
981
982	flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_PT);
983
984	if (pt->pt_databuflen > 0) {
985		nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
986		if (flags == 0)
987			rv = copyout(NVME_DMA_KVA(mem), pt->pt_databuf,
988			    pt->pt_databuflen);
989	}
990
991	if (rv == 0 && pt->pt_statuslen > 0) {
992		pt_status.ps_dv_unit = dv_unit;
993		pt_status.ps_nsid = nsid;
994		pt_status.ps_flags = flags;
995		pt_status.ps_cc = nvme_read4(sc, NVME_CC);
996		pt_status.ps_csts = nvme_read4(sc, NVME_CSTS);
997		rv = copyout(&pt_status, pt->pt_status, pt->pt_statuslen);
998	}
999
1000 done:
1001	if (mem)
1002		nvme_dmamem_free(sc, mem);
1003	if (ccb)
1004		nvme_ccb_put(sc, ccb);
1005
1006	return rv;
1007}
1008
1009int
1010nvme_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
1011{
1012	struct nvme_softc		*sc = link->bus->sb_adapter_softc;
1013	struct nvme_pt_cmd		*pt = (struct nvme_pt_cmd *)addr;
1014	int				 rv;
1015
1016	switch (cmd) {
1017	case NVME_PASSTHROUGH_CMD:
1018		break;
1019	default:
1020		return ENOTTY;
1021	}
1022
1023	if ((pt->pt_cdw10 & 0xff) == 0)
1024		pt->pt_nsid = link->target;
1025
1026	rv = nvme_passthrough_cmd(sc, pt, sc->sc_dev.dv_unit, link->target);
1027	if (rv)
1028		goto done;
1029
1030 done:
1031	return rv;
1032}
1033
1034uint32_t
1035nvme_op_sq_enter(struct nvme_softc *sc,
1036    struct nvme_queue *q, struct nvme_ccb *ccb)
1037{
1038	mtx_enter(&q->q_sq_mtx);
1039	return (nvme_op_sq_enter_locked(sc, q, ccb));
1040}
1041
1042uint32_t
1043nvme_op_sq_enter_locked(struct nvme_softc *sc,
1044    struct nvme_queue *q, struct nvme_ccb *ccb)
1045{
1046	return (q->q_sq_tail);
1047}
1048
1049void
1050nvme_op_sq_leave_locked(struct nvme_softc *sc,
1051    struct nvme_queue *q, struct nvme_ccb *ccb)
1052{
1053	uint32_t tail;
1054
1055	tail = ++q->q_sq_tail;
1056	if (tail >= q->q_entries)
1057		tail = 0;
1058	q->q_sq_tail = tail;
1059	nvme_write4(sc, q->q_sqtdbl, tail);
1060}
1061
1062void
1063nvme_op_sq_leave(struct nvme_softc *sc,
1064    struct nvme_queue *q, struct nvme_ccb *ccb)
1065{
1066	nvme_op_sq_leave_locked(sc, q, ccb);
1067	mtx_leave(&q->q_sq_mtx);
1068}
1069
1070void
1071nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
1072    void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *))
1073{
1074	struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem);
1075	u_int32_t tail;
1076
1077	tail = sc->sc_ops->op_sq_enter(sc, q, ccb);
1078
1079	sqe += tail;
1080
1081	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1082	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
1083	memset(sqe, 0, sizeof(*sqe));
1084	(*fill)(sc, ccb, sqe);
1085	sqe->cid = ccb->ccb_id;
1086	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1087	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
1088
1089	sc->sc_ops->op_sq_leave(sc, q, ccb);
1090}
1091
1092struct nvme_poll_state {
1093	struct nvme_sqe s;
1094	struct nvme_cqe c;
1095};
1096
1097int
1098nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
1099    void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *), u_int32_t ms)
1100{
1101	struct nvme_poll_state state;
1102	void (*done)(struct nvme_softc *, struct nvme_ccb *, struct nvme_cqe *);
1103	void *cookie;
1104	int64_t us;
1105	u_int16_t flags;
1106
1107	memset(&state, 0, sizeof(state));
1108	(*fill)(sc, ccb, &state.s);
1109
1110	done = ccb->ccb_done;
1111	cookie = ccb->ccb_cookie;
1112
1113	ccb->ccb_done = nvme_poll_done;
1114	ccb->ccb_cookie = &state;
1115
1116	nvme_q_submit(sc, q, ccb, nvme_poll_fill);
1117	for (us = ms * 1000; ms == 0 || us > 0; us -= NVME_TIMO_DELAYNS) {
1118		if (ISSET(state.c.flags, htole16(NVME_CQE_PHASE)))
1119			break;
1120		if (nvme_q_complete(sc, q) == 0)
1121			delay(NVME_TIMO_DELAYNS);
1122		nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ);
1123	}
1124
1125	ccb->ccb_cookie = cookie;
1126	done(sc, ccb, &state.c);
1127
1128	flags = lemtoh16(&state.c.flags);
1129
1130	return (flags & ~NVME_CQE_PHASE);
1131}
1132
1133void
1134nvme_poll_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
1135{
1136	struct nvme_sqe *sqe = slot;
1137	struct nvme_poll_state *state = ccb->ccb_cookie;
1138
1139	*sqe = state->s;
1140}
1141
1142void
1143nvme_poll_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
1144    struct nvme_cqe *cqe)
1145{
1146	struct nvme_poll_state *state = ccb->ccb_cookie;
1147
1148	state->c = *cqe;
1149	SET(state->c.flags, htole16(NVME_CQE_PHASE));
1150}
1151
1152void
1153nvme_sqe_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
1154{
1155	struct nvme_sqe *src = ccb->ccb_cookie;
1156	struct nvme_sqe *dst = slot;
1157
1158	*dst = *src;
1159}
1160
1161void
1162nvme_empty_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
1163    struct nvme_cqe *cqe)
1164{
1165}
1166
1167void
1168nvme_op_cq_done(struct nvme_softc *sc,
1169    struct nvme_queue *q, struct nvme_ccb *ccb)
1170{
1171	/* nop */
1172}
1173
1174int
1175nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q)
1176{
1177	struct nvme_ccb *ccb;
1178	struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe;
1179	u_int32_t head;
1180	u_int16_t flags;
1181	int rv = 0;
1182
1183	if (!mtx_enter_try(&q->q_cq_mtx))
1184		return (-1);
1185
1186	head = q->q_cq_head;
1187
1188	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1189	for (;;) {
1190		cqe = &ring[head];
1191		flags = lemtoh16(&cqe->flags);
1192		if ((flags & NVME_CQE_PHASE) != q->q_cq_phase)
1193			break;
1194
1195		membar_consumer();
1196
1197		ccb = &sc->sc_ccbs[cqe->cid];
1198		sc->sc_ops->op_cq_done(sc, q, ccb);
1199		ccb->ccb_done(sc, ccb, cqe);
1200
1201		if (++head >= q->q_entries) {
1202			head = 0;
1203			q->q_cq_phase ^= NVME_CQE_PHASE;
1204		}
1205
1206		rv = 1;
1207	}
1208	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1209
1210	if (rv)
1211		nvme_write4(sc, q->q_cqhdbl, q->q_cq_head = head);
1212	mtx_leave(&q->q_cq_mtx);
1213
1214	return (rv);
1215}
1216
1217int
1218nvme_identify(struct nvme_softc *sc, u_int mpsmin)
1219{
1220	char sn[41], mn[81], fr[17];
1221	struct nvm_identify_controller *identify;
1222	struct nvme_dmamem *mem;
1223	struct nvme_ccb *ccb;
1224	int rv = 1;
1225
1226	ccb = nvme_ccb_get(sc);
1227	if (ccb == NULL)
1228		panic("nvme_identify: nvme_ccb_get returned NULL");
1229
1230	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
1231	if (mem == NULL)
1232		return (1);
1233
1234	ccb->ccb_done = nvme_empty_done;
1235	ccb->ccb_cookie = mem;
1236
1237	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
1238	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify,
1239	    NVME_TIMO_IDENT);
1240	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
1241
1242	nvme_ccb_put(sc, ccb);
1243
1244	if (rv != 0)
1245		goto done;
1246
1247	identify = NVME_DMA_KVA(mem);
1248
1249	scsi_strvis(sn, identify->sn, sizeof(identify->sn));
1250	scsi_strvis(mn, identify->mn, sizeof(identify->mn));
1251	scsi_strvis(fr, identify->fr, sizeof(identify->fr));
1252
1253	printf("%s: %s, firmware %s, serial %s\n", DEVNAME(sc), mn, fr, sn);
1254
1255	if (identify->mdts > 0) {
1256		sc->sc_mdts = (1 << identify->mdts) * (1 << mpsmin);
1257		if (sc->sc_mdts > NVME_MAXPHYS)
1258			sc->sc_mdts = NVME_MAXPHYS;
1259		sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps;
1260	}
1261
1262	sc->sc_nn = lemtoh32(&identify->nn);
1263
1264	/*
1265	 * At least one Apple NVMe device presents a second, bogus disk that is
1266	 * inaccessible, so cap targets at 1.
1267	 *
1268	 * sd1 at scsibus1 targ 2 lun 0: <NVMe, APPLE SSD AP0512, 16.1> [..]
1269	 * sd1: 0MB, 4096 bytes/sector, 2 sectors
1270	 */
1271	if (sc->sc_nn > 1 &&
1272	    mn[0] == 'A' && mn[1] == 'P' && mn[2] == 'P' && mn[3] == 'L' &&
1273	    mn[4] == 'E')
1274		sc->sc_nn = 1;
1275
1276	memcpy(&sc->sc_identify, identify, sizeof(sc->sc_identify));
1277
1278done:
1279	nvme_dmamem_free(sc, mem);
1280
1281	return (rv);
1282}
1283
1284int
1285nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q)
1286{
1287	struct nvme_sqe_q sqe;
1288	struct nvme_ccb *ccb;
1289	int rv;
1290
1291	ccb = scsi_io_get(&sc->sc_iopool, 0);
1292	KASSERT(ccb != NULL);
1293
1294	ccb->ccb_done = nvme_empty_done;
1295	ccb->ccb_cookie = &sqe;
1296
1297	memset(&sqe, 0, sizeof(sqe));
1298	sqe.opcode = NVM_ADMIN_ADD_IOCQ;
1299	htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem));
1300	htolem16(&sqe.qsize, q->q_entries - 1);
1301	htolem16(&sqe.qid, q->q_id);
1302	sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
1303
1304	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1305	if (rv != 0)
1306		goto fail;
1307
1308	ccb->ccb_done = nvme_empty_done;
1309	ccb->ccb_cookie = &sqe;
1310
1311	memset(&sqe, 0, sizeof(sqe));
1312	sqe.opcode = NVM_ADMIN_ADD_IOSQ;
1313	htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem));
1314	htolem16(&sqe.qsize, q->q_entries - 1);
1315	htolem16(&sqe.qid, q->q_id);
1316	htolem16(&sqe.cqid, q->q_id);
1317	sqe.qflags = NVM_SQE_Q_PC;
1318
1319	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1320	if (rv != 0)
1321		goto fail;
1322
1323fail:
1324	scsi_io_put(&sc->sc_iopool, ccb);
1325	return (rv);
1326}
1327
1328int
1329nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q)
1330{
1331	struct nvme_sqe_q sqe;
1332	struct nvme_ccb *ccb;
1333	int rv;
1334
1335	ccb = scsi_io_get(&sc->sc_iopool, 0);
1336	KASSERT(ccb != NULL);
1337
1338	ccb->ccb_done = nvme_empty_done;
1339	ccb->ccb_cookie = &sqe;
1340
1341	memset(&sqe, 0, sizeof(sqe));
1342	sqe.opcode = NVM_ADMIN_DEL_IOSQ;
1343	htolem16(&sqe.qid, q->q_id);
1344
1345	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1346	if (rv != 0)
1347		goto fail;
1348
1349	ccb->ccb_done = nvme_empty_done;
1350	ccb->ccb_cookie = &sqe;
1351
1352	memset(&sqe, 0, sizeof(sqe));
1353	sqe.opcode = NVM_ADMIN_DEL_IOCQ;
1354	htolem16(&sqe.qid, q->q_id);
1355
1356	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1357	if (rv != 0)
1358		goto fail;
1359
1360	nvme_q_free(sc, q);
1361
1362fail:
1363	scsi_io_put(&sc->sc_iopool, ccb);
1364	return (rv);
1365
1366}
1367
1368void
1369nvme_fill_identify(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
1370{
1371	struct nvme_sqe *sqe = slot;
1372	struct nvme_dmamem *mem = ccb->ccb_cookie;
1373
1374	sqe->opcode = NVM_ADMIN_IDENTIFY;
1375	htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem));
1376	htolem32(&sqe->cdw10, 1);
1377}
1378
1379int
1380nvme_ccbs_alloc(struct nvme_softc *sc, u_int nccbs)
1381{
1382	struct nvme_ccb *ccb;
1383	bus_addr_t off;
1384	u_int64_t *prpl;
1385	u_int i;
1386
1387	sc->sc_ccbs = mallocarray(nccbs, sizeof(*ccb), M_DEVBUF,
1388	    M_WAITOK | M_CANFAIL);
1389	if (sc->sc_ccbs == NULL)
1390		return (1);
1391
1392	sc->sc_ccb_prpls = nvme_dmamem_alloc(sc,
1393	    sizeof(*prpl) * sc->sc_max_prpl * nccbs);
1394
1395	prpl = NVME_DMA_KVA(sc->sc_ccb_prpls);
1396	off = 0;
1397
1398	for (i = 0; i < nccbs; i++) {
1399		ccb = &sc->sc_ccbs[i];
1400
1401		if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts,
1402		    sc->sc_max_prpl + 1, /* we get a free prp in the sqe */
1403		    sc->sc_mps, sc->sc_mps,
1404		    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
1405		    &ccb->ccb_dmamap) != 0)
1406			goto free_maps;
1407
1408		ccb->ccb_id = i;
1409		ccb->ccb_prpl = prpl;
1410		ccb->ccb_prpl_off = off;
1411		ccb->ccb_prpl_dva = NVME_DMA_DVA(sc->sc_ccb_prpls) + off;
1412
1413		SIMPLEQ_INSERT_TAIL(&sc->sc_ccb_list, ccb, ccb_entry);
1414
1415		prpl += sc->sc_max_prpl;
1416		off += sizeof(*prpl) * sc->sc_max_prpl;
1417	}
1418
1419	return (0);
1420
1421free_maps:
1422	nvme_ccbs_free(sc, nccbs);
1423	return (1);
1424}
1425
1426void *
1427nvme_ccb_get(void *cookie)
1428{
1429	struct nvme_softc *sc = cookie;
1430	struct nvme_ccb *ccb;
1431
1432	mtx_enter(&sc->sc_ccb_mtx);
1433	ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list);
1434	if (ccb != NULL)
1435		SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry);
1436	mtx_leave(&sc->sc_ccb_mtx);
1437
1438	return (ccb);
1439}
1440
1441void
1442nvme_ccb_put(void *cookie, void *io)
1443{
1444	struct nvme_softc *sc = cookie;
1445	struct nvme_ccb *ccb = io;
1446
1447	mtx_enter(&sc->sc_ccb_mtx);
1448	SIMPLEQ_INSERT_HEAD(&sc->sc_ccb_list, ccb, ccb_entry);
1449	mtx_leave(&sc->sc_ccb_mtx);
1450}
1451
1452void
1453nvme_ccbs_free(struct nvme_softc *sc, unsigned int nccbs)
1454{
1455	struct nvme_ccb *ccb;
1456
1457	while ((ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list)) != NULL) {
1458		SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry);
1459		bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap);
1460	}
1461
1462	nvme_dmamem_free(sc, sc->sc_ccb_prpls);
1463	free(sc->sc_ccbs, M_DEVBUF, nccbs * sizeof(*ccb));
1464}
1465
1466struct nvme_queue *
1467nvme_q_alloc(struct nvme_softc *sc, u_int16_t id, u_int entries, u_int dstrd)
1468{
1469	struct nvme_queue *q;
1470
1471	q = malloc(sizeof(*q), M_DEVBUF, M_WAITOK | M_CANFAIL);
1472	if (q == NULL)
1473		return (NULL);
1474
1475	q->q_sq_dmamem = nvme_dmamem_alloc(sc,
1476	    sizeof(struct nvme_sqe) * entries);
1477	if (q->q_sq_dmamem == NULL)
1478		goto free;
1479
1480	q->q_cq_dmamem = nvme_dmamem_alloc(sc,
1481	    sizeof(struct nvme_cqe) * entries);
1482	if (q->q_cq_dmamem == NULL)
1483		goto free_sq;
1484
1485	memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
1486	memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
1487
1488	mtx_init(&q->q_sq_mtx, IPL_BIO);
1489	mtx_init(&q->q_cq_mtx, IPL_BIO);
1490	q->q_sqtdbl = NVME_SQTDBL(id, dstrd);
1491	q->q_cqhdbl = NVME_CQHDBL(id, dstrd);
1492
1493	q->q_id = id;
1494	q->q_entries = entries;
1495	q->q_sq_tail = 0;
1496	q->q_cq_head = 0;
1497	q->q_cq_phase = NVME_CQE_PHASE;
1498
1499	if (sc->sc_ops->op_q_alloc != NULL) {
1500		if (sc->sc_ops->op_q_alloc(sc, q) != 0)
1501			goto free_cq;
1502	}
1503
1504	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
1505	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1506
1507	return (q);
1508
1509free_cq:
1510	nvme_dmamem_free(sc, q->q_cq_dmamem);
1511free_sq:
1512	nvme_dmamem_free(sc, q->q_sq_dmamem);
1513free:
1514	free(q, M_DEVBUF, sizeof *q);
1515
1516	return (NULL);
1517}
1518
1519int
1520nvme_q_reset(struct nvme_softc *sc, struct nvme_queue *q)
1521{
1522	memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
1523	memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
1524
1525	q->q_sq_tail = 0;
1526	q->q_cq_head = 0;
1527	q->q_cq_phase = NVME_CQE_PHASE;
1528
1529	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
1530	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1531
1532	return (0);
1533}
1534
1535void
1536nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q)
1537{
1538	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1539	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE);
1540
1541	if (sc->sc_ops->op_q_alloc != NULL)
1542		sc->sc_ops->op_q_free(sc, q);
1543
1544	nvme_dmamem_free(sc, q->q_cq_dmamem);
1545	nvme_dmamem_free(sc, q->q_sq_dmamem);
1546	free(q, M_DEVBUF, sizeof *q);
1547}
1548
1549int
1550nvme_intr(void *xsc)
1551{
1552	struct nvme_softc *sc = xsc;
1553	int rv = 0;
1554
1555	if (nvme_q_complete(sc, sc->sc_q))
1556		rv = 1;
1557	if (nvme_q_complete(sc, sc->sc_admin_q))
1558		rv = 1;
1559
1560	return (rv);
1561}
1562
1563int
1564nvme_intr_intx(void *xsc)
1565{
1566	struct nvme_softc *sc = xsc;
1567	int rv;
1568
1569	nvme_write4(sc, NVME_INTMS, 1);
1570	rv = nvme_intr(sc);
1571	nvme_write4(sc, NVME_INTMC, 1);
1572
1573	return (rv);
1574}
1575
1576struct nvme_dmamem *
1577nvme_dmamem_alloc(struct nvme_softc *sc, size_t size)
1578{
1579	struct nvme_dmamem *ndm;
1580	int nsegs;
1581
1582	ndm = malloc(sizeof(*ndm), M_DEVBUF, M_WAITOK | M_ZERO);
1583	if (ndm == NULL)
1584		return (NULL);
1585
1586	ndm->ndm_size = size;
1587
1588	if (bus_dmamap_create(sc->sc_dmat, size, 1, size, 0,
1589	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
1590	    &ndm->ndm_map) != 0)
1591		goto ndmfree;
1592
1593	if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg,
1594	    1, &nsegs, BUS_DMA_WAITOK | BUS_DMA_ZERO) != 0)
1595		goto destroy;
1596
1597	if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size,
1598	    &ndm->ndm_kva, BUS_DMA_WAITOK) != 0)
1599		goto free;
1600
1601	if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size,
1602	    NULL, BUS_DMA_WAITOK) != 0)
1603		goto unmap;
1604
1605	return (ndm);
1606
1607unmap:
1608	bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size);
1609free:
1610	bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1611destroy:
1612	bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1613ndmfree:
1614	free(ndm, M_DEVBUF, sizeof *ndm);
1615
1616	return (NULL);
1617}
1618
1619void
1620nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops)
1621{
1622	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem),
1623	    0, NVME_DMA_LEN(mem), ops);
1624}
1625
1626void
1627nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm)
1628{
1629	bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map);
1630	bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size);
1631	bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1632	bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1633	free(ndm, M_DEVBUF, sizeof *ndm);
1634}
1635
1636#ifdef HIBERNATE
1637
1638int
1639nvme_hibernate_admin_cmd(struct nvme_softc *sc, struct nvme_sqe *sqe,
1640    struct nvme_cqe *cqe, int cid)
1641{
1642	struct nvme_sqe *asqe = NVME_DMA_KVA(sc->sc_admin_q->q_sq_dmamem);
1643	struct nvme_cqe *acqe = NVME_DMA_KVA(sc->sc_admin_q->q_cq_dmamem);
1644	struct nvme_queue *q = sc->sc_admin_q;
1645	int tail;
1646	u_int16_t flags;
1647
1648	/* submit command */
1649	tail = sc->sc_ops->op_sq_enter_locked(sc, q, /* XXX ccb */ NULL);
1650
1651	asqe += tail;
1652	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1653	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
1654	*asqe = *sqe;
1655	asqe->cid = cid;
1656	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1657	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
1658
1659	sc->sc_ops->op_sq_leave_locked(sc, q, /* XXX ccb */ NULL);
1660
1661	/* wait for completion */
1662	acqe += q->q_cq_head;
1663	for (;;) {
1664		nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1665		flags = lemtoh16(&acqe->flags);
1666		if ((flags & NVME_CQE_PHASE) == q->q_cq_phase)
1667			break;
1668
1669		delay(10);
1670	}
1671
1672	if (++q->q_cq_head >= q->q_entries) {
1673		q->q_cq_head = 0;
1674		q->q_cq_phase ^= NVME_CQE_PHASE;
1675	}
1676	nvme_write4(sc, q->q_cqhdbl, q->q_cq_head);
1677	if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) || (acqe->cid != cid))
1678		return (EIO);
1679
1680	return (0);
1681}
1682
1683int
1684nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size,
1685    int op, void *page)
1686{
1687	struct nvme_hibernate_page {
1688		u_int64_t		prpl[MAXPHYS / PAGE_SIZE];
1689
1690		struct nvme_softc	*sc;
1691		int			nsid;
1692		int			sq_tail;
1693		int			cq_head;
1694		int			cqe_phase;
1695
1696		daddr_t			poffset;
1697		size_t			psize;
1698		u_int32_t		secsize;
1699	} *my = page;
1700	struct nvme_sqe_io *isqe;
1701	struct nvme_cqe *icqe;
1702	paddr_t data_phys, page_phys;
1703	u_int64_t data_bus_phys, page_bus_phys;
1704	u_int16_t flags;
1705	int i;
1706	int error;
1707
1708	if (op == HIB_INIT) {
1709		struct device *disk;
1710		struct device *scsibus;
1711		struct nvm_identify_namespace *ns;
1712		struct nvm_namespace_format *f;
1713		extern struct cfdriver sd_cd;
1714		struct scsi_link *link;
1715		struct scsibus_softc *bus_sc;
1716		struct nvme_sqe_q qsqe;
1717		struct nvme_cqe qcqe;
1718
1719		/* find nvme softc */
1720		disk = disk_lookup(&sd_cd, DISKUNIT(dev));
1721		scsibus = disk->dv_parent;
1722		my->sc = (struct nvme_softc *)disk->dv_parent->dv_parent;
1723
1724		/* find scsi_link, which tells us the target */
1725		my->nsid = 0;
1726		bus_sc = (struct scsibus_softc *)scsibus;
1727		SLIST_FOREACH(link, &bus_sc->sc_link_list, bus_list) {
1728			if (link->device_softc == disk) {
1729				my->nsid = link->target;
1730				break;
1731			}
1732		}
1733		if (my->nsid == 0)
1734			return (EIO);
1735		ns = my->sc->sc_namespaces[my->nsid].ident;
1736		f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
1737
1738		my->poffset = blkno;
1739		my->psize = size;
1740		my->secsize = 1 << f->lbads;
1741
1742		memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem), 0,
1743		    my->sc->sc_hib_q->q_entries * sizeof(struct nvme_cqe));
1744		memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem), 0,
1745		    my->sc->sc_hib_q->q_entries * sizeof(struct nvme_sqe));
1746
1747		my->sq_tail = 0;
1748		my->cq_head = 0;
1749		my->cqe_phase = NVME_CQE_PHASE;
1750
1751		memset(&qsqe, 0, sizeof(qsqe));
1752		qsqe.opcode = NVM_ADMIN_ADD_IOCQ;
1753		htolem64(&qsqe.prp1,
1754		    NVME_DMA_DVA(my->sc->sc_hib_q->q_cq_dmamem));
1755		htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1);
1756		htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id);
1757		qsqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
1758		if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe,
1759		    &qcqe, 1) != 0)
1760			return (EIO);
1761
1762		memset(&qsqe, 0, sizeof(qsqe));
1763		qsqe.opcode = NVM_ADMIN_ADD_IOSQ;
1764		htolem64(&qsqe.prp1,
1765		    NVME_DMA_DVA(my->sc->sc_hib_q->q_sq_dmamem));
1766		htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1);
1767		htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id);
1768		htolem16(&qsqe.cqid, my->sc->sc_hib_q->q_id);
1769		qsqe.qflags = NVM_SQE_Q_PC;
1770		if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe,
1771		    &qcqe, 2) != 0)
1772			return (EIO);
1773
1774		return (0);
1775	}
1776
1777	if (op != HIB_W)
1778		return (0);
1779
1780	if (blkno + (size / DEV_BSIZE) > my->psize)
1781		return E2BIG;
1782
1783	isqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem);
1784	isqe += my->sq_tail;
1785	if (++my->sq_tail == my->sc->sc_hib_q->q_entries)
1786		my->sq_tail = 0;
1787
1788	memset(isqe, 0, sizeof(*isqe));
1789	isqe->opcode = NVM_CMD_WRITE;
1790	htolem32(&isqe->nsid, my->nsid);
1791
1792	pmap_extract(pmap_kernel(), addr, &data_phys);
1793	data_bus_phys = data_phys;
1794	htolem64(&isqe->entry.prp[0], data_bus_phys);
1795	if ((size > my->sc->sc_mps) && (size <= my->sc->sc_mps * 2)) {
1796		htolem64(&isqe->entry.prp[1], data_bus_phys + my->sc->sc_mps);
1797	} else if (size > my->sc->sc_mps * 2) {
1798		pmap_extract(pmap_kernel(), (vaddr_t)page, &page_phys);
1799		page_bus_phys = page_phys;
1800		htolem64(&isqe->entry.prp[1], page_bus_phys +
1801		    offsetof(struct nvme_hibernate_page, prpl));
1802		for (i = 1; i < (size / my->sc->sc_mps); i++) {
1803			htolem64(&my->prpl[i - 1], data_bus_phys +
1804			    (i * my->sc->sc_mps));
1805		}
1806	}
1807
1808	isqe->slba = (blkno + my->poffset) / (my->secsize / DEV_BSIZE);
1809	isqe->nlb = (size / my->secsize) - 1;
1810	isqe->cid = blkno % 0xffff;
1811
1812	nvme_write4(my->sc, NVME_SQTDBL(NVME_HIB_Q, my->sc->sc_dstrd),
1813	    my->sq_tail);
1814	nvme_barrier(my->sc, NVME_SQTDBL(NVME_HIB_Q, my->sc->sc_dstrd), 4,
1815	    BUS_SPACE_BARRIER_WRITE);
1816
1817	error = 0;
1818
1819	icqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem);
1820	icqe += my->cq_head;
1821
1822	nvme_dmamem_sync(my->sc, my->sc->sc_hib_q->q_cq_dmamem,
1823	    BUS_DMASYNC_POSTREAD);
1824	for (;;) {
1825		flags = lemtoh16(&icqe->flags);
1826		if ((flags & NVME_CQE_PHASE) == my->cqe_phase) {
1827			if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) ||
1828			    (icqe->cid != blkno % 0xffff))
1829				error = EIO;
1830
1831			break;
1832		}
1833
1834		delay(1);
1835		nvme_dmamem_sync(my->sc, my->sc->sc_hib_q->q_cq_dmamem,
1836		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_POSTREAD);
1837	}
1838	nvme_dmamem_sync(my->sc, my->sc->sc_hib_q->q_cq_dmamem,
1839	    BUS_DMASYNC_PREREAD);
1840
1841	if (++my->cq_head == my->sc->sc_hib_q->q_entries) {
1842		my->cq_head = 0;
1843		my->cqe_phase ^= NVME_CQE_PHASE;
1844	}
1845
1846	nvme_write4(my->sc, NVME_CQHDBL(NVME_HIB_Q, my->sc->sc_dstrd),
1847	    my->cq_head);
1848	nvme_barrier(my->sc, NVME_CQHDBL(NVME_HIB_Q, my->sc->sc_dstrd), 4,
1849	    BUS_SPACE_BARRIER_WRITE);
1850
1851	return (error);
1852}
1853
1854#endif
1855
1856#if NBIO > 0
1857int
1858nvme_bioctl(struct device *self, u_long cmd, caddr_t data)
1859{
1860	struct nvme_softc	*sc = (struct nvme_softc *)self;
1861	struct nvme_pt_cmd	*pt;
1862	int			 error = 0;
1863
1864	rw_enter_write(&sc->sc_lock);
1865
1866	switch (cmd) {
1867	case BIOCINQ:
1868		error = nvme_bioctl_inq(sc, (struct bioc_inq *)data);
1869		break;
1870	case BIOCVOL:
1871		error = nvme_bioctl_vol(sc, (struct bioc_vol *)data);
1872		break;
1873	case BIOCDISK:
1874		error = nvme_bioctl_disk(sc, (struct bioc_disk *)data);
1875		break;
1876	case NVME_PASSTHROUGH_CMD:
1877		pt = (struct nvme_pt_cmd *)data;
1878		error = nvme_passthrough_cmd(sc, pt, sc->sc_dev.dv_unit, -1);
1879		break;
1880	default:
1881		printf("nvme_bioctl() Unknown command (%lu)\n", cmd);
1882		error = ENOTTY;
1883	}
1884
1885	rw_exit_write(&sc->sc_lock);
1886
1887	return error;
1888}
1889
1890void
1891nvme_bio_status(struct bio_status *bs, const char *fmt, ...)
1892{
1893	va_list			ap;
1894
1895	va_start(ap, fmt);
1896	bio_status(bs, 0, BIO_MSG_INFO, fmt, &ap);
1897	va_end(ap);
1898}
1899
1900const char *
1901nvme_bioctl_sdname(const struct nvme_softc *sc, int target)
1902{
1903	const struct scsi_link		*link;
1904	const struct sd_softc		*sd;
1905
1906	link = scsi_get_link(sc->sc_scsibus, target, 0);
1907	if (link == NULL)
1908		return NULL;
1909	sd = (struct sd_softc *)(link->device_softc);
1910	if (ISSET(link->state, SDEV_S_DYING) || sd == NULL ||
1911	    ISSET(sd->flags, SDF_DYING))
1912		return NULL;
1913
1914	if (nvme_read4(sc, NVME_VS) == 0xffffffff)
1915		return NULL;
1916
1917	return DEVNAME(sd);
1918}
1919
1920int
1921nvme_bioctl_inq(struct nvme_softc *sc, struct bioc_inq *bi)
1922{
1923	char				 sn[41], mn[81], fr[17];
1924	struct nvm_identify_controller	*idctrl = &sc->sc_identify;
1925	struct bio_status		*bs;
1926	unsigned int			 nn;
1927	uint32_t			 cc, csts, vs;
1928
1929	/* Don't tell bioctl about namespaces > last configured namespace. */
1930	for (nn = sc->sc_nn; nn > 0; nn--) {
1931		if (sc->sc_namespaces[nn].ident)
1932			break;
1933	}
1934	bi->bi_novol = bi->bi_nodisk = nn;
1935	strlcpy(bi->bi_dev, DEVNAME(sc), sizeof(bi->bi_dev));
1936
1937	bs = &bi->bi_bio.bio_status;
1938	bio_status_init(bs, &sc->sc_dev);
1939	bs->bs_status = BIO_STATUS_SUCCESS;
1940
1941	scsi_strvis(sn, idctrl->sn, sizeof(idctrl->sn));
1942	scsi_strvis(mn, idctrl->mn, sizeof(idctrl->mn));
1943	scsi_strvis(fr, idctrl->fr, sizeof(idctrl->fr));
1944
1945	nvme_bio_status(bs, "%s, %s, %s", mn, fr, sn);
1946	nvme_bio_status(bs, "Max i/o %zu bytes%s%s%s, Sanitize 0x%b",
1947	    sc->sc_mdts,
1948	    ISSET(idctrl->lpa, NVM_ID_CTRL_LPA_PE) ?
1949	    ", Persisent Event Log" : "",
1950	    ISSET(idctrl->fna, NVM_ID_CTRL_FNA_CRYPTOFORMAT) ?
1951	    ", CryptoFormat" : "",
1952	    ISSET(idctrl->vwc, NVM_ID_CTRL_VWC_PRESENT) ?
1953	    ", Volatile Write Cache" : "",
1954	    lemtoh32(&idctrl->sanicap), NVM_ID_CTRL_SANICAP_FMT
1955	);
1956
1957	if (idctrl->ctratt != 0)
1958		nvme_bio_status(bs, "Features 0x%b", lemtoh32(&idctrl->ctratt),
1959		    NVM_ID_CTRL_CTRATT_FMT);
1960
1961	if (idctrl->oacs || idctrl->oncs) {
1962		nvme_bio_status(bs, "Admin commands 0x%b, NVM commands 0x%b",
1963		    lemtoh16(&idctrl->oacs), NVM_ID_CTRL_OACS_FMT,
1964		    lemtoh16(&idctrl->oncs), NVM_ID_CTRL_ONCS_FMT);
1965	}
1966
1967	cc = nvme_read4(sc, NVME_CC);
1968	csts = nvme_read4(sc, NVME_CSTS);
1969	vs = nvme_read4(sc, NVME_VS);
1970
1971	if (vs == 0xffffffff) {
1972		nvme_bio_status(bs, "Invalid PCIe register mapping");
1973		return 0;
1974	}
1975
1976	nvme_bio_status(bs, "NVMe %u.%u%s%s%sabled, %sReady%s%s%s%s",
1977	    NVME_VS_MJR(vs), NVME_VS_MNR(vs),
1978	    (NVME_CC_CSS_R(cc) == NVME_CC_CSS_NVM) ? ", NVM I/O command set" : "",
1979	    (NVME_CC_CSS_R(cc) == 0x7) ? ", Admin command set only" : "",
1980	    ISSET(cc, NVME_CC_EN) ? ", En" : "Dis",
1981	    ISSET(csts, NVME_CSTS_RDY) ? "" : "Not ",
1982	    ISSET(csts, NVME_CSTS_CFS) ? ", Fatal Error, " : "",
1983	    (NVME_CC_SHN_R(cc) == NVME_CC_SHN_NORMAL) ? ", Normal shutdown" : "",
1984	    (NVME_CC_SHN_R(cc) == NVME_CC_SHN_ABRUPT) ? ", Abrupt shutdown" : "",
1985	    ISSET(csts, NVME_CSTS_SHST_DONE) ? " complete" : "");
1986
1987	return 0;
1988}
1989
1990int
1991nvme_bioctl_vol(struct nvme_softc *sc, struct bioc_vol *bv)
1992{
1993	const struct nvm_identify_namespace	*idns;
1994	const char				*sd;
1995	int					 target;
1996	unsigned int 				 lbaf;
1997
1998	target = bv->bv_volid + 1;
1999	if (target > sc->sc_nn) {
2000		bv->bv_status = BIOC_SVINVALID;
2001		return 0;
2002	}
2003
2004	bv->bv_level = 'c';
2005	bv->bv_nodisk = 1;
2006
2007	idns = sc->sc_namespaces[target].ident;
2008	if (idns == NULL) {
2009		bv->bv_status = BIOC_SVINVALID;
2010		return 0;
2011	}
2012
2013	lbaf = NVME_ID_NS_FLBAS(idns->flbas);
2014	if (idns->nlbaf > 16)
2015		lbaf |= (idns->flbas >> 1) & 0x3f;
2016	bv->bv_size = nvme_scsi_size(idns) << idns->lbaf[lbaf].lbads;
2017
2018	sd = nvme_bioctl_sdname(sc, target);
2019	if (sd) {
2020		strlcpy(bv->bv_dev, sd, sizeof(bv->bv_dev));
2021		bv->bv_status = BIOC_SVONLINE;
2022	} else
2023		bv->bv_status = BIOC_SVOFFLINE;
2024
2025	return 0;
2026}
2027
2028int
2029nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_disk *bd)
2030{
2031	const char 			*rpdesc[4] = {
2032		" (Best)",
2033		" (Better)",
2034		" (Good)",
2035		" (Degraded)"
2036	};
2037	const char			*protection[4] = {
2038		"not enabled",
2039		"Type 1",
2040		"Type 2",
2041		"Type 3",
2042	};
2043	char				 buf[32], msg[BIO_MSG_LEN];
2044	struct nvm_identify_namespace	*idns;
2045	struct bio_status		*bs;
2046	uint64_t			 id1, id2;
2047	unsigned int			 i, lbaf, target;
2048	uint16_t			 ms;
2049	uint8_t				 dps;
2050
2051	target = bd->bd_volid + 1;
2052	if (target > sc->sc_nn)
2053		return EINVAL;
2054	bd->bd_channel = sc->sc_scsibus->sc_dev.dv_unit;
2055	bd->bd_target = target;
2056	bd->bd_lun = 0;
2057	snprintf(bd->bd_procdev, sizeof(bd->bd_procdev), "Namespace %u", target);
2058
2059	bs = &bd->bd_bio.bio_status;
2060	bs->bs_status = BIO_STATUS_SUCCESS;
2061	snprintf(bs->bs_controller, sizeof(bs->bs_controller), "%11u",
2062	    bd->bd_diskid);
2063
2064	idns = sc->sc_namespaces[target].ident;
2065	if (idns == NULL) {
2066		bd->bd_status = BIOC_SDUNUSED;
2067		return 0;
2068	}
2069
2070	lbaf = NVME_ID_NS_FLBAS(idns->flbas);
2071	if (idns->nlbaf > nitems(idns->lbaf))
2072		lbaf |= (idns->flbas >> 1) & 0x3f;
2073	bd->bd_size = lemtoh64(&idns->nsze) << idns->lbaf[lbaf].lbads;
2074
2075	if (memcmp(idns->nguid, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) {
2076		memcpy(&id1, idns->nguid, sizeof(uint64_t));
2077		memcpy(&id2, idns->nguid + sizeof(uint64_t), sizeof(uint64_t));
2078		snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx%08llx",
2079		    id1, id2);
2080	} else if (memcmp(idns->eui64, "\0\0\0\0\0\0\0\0", 8)) {
2081		memcpy(&id1, idns->eui64, sizeof(uint64_t));
2082		snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx", id1);
2083	}
2084
2085	msg[0] = '\0';
2086	for (i = 0; i <= idns->nlbaf; i++) {
2087		if (idns->lbaf[i].lbads == 0)
2088			continue;
2089		snprintf(buf, sizeof(buf), "%s%s%u",
2090		    strlen(msg) ? ", " : "", (i == lbaf) ? "*" : "",
2091		    1 << idns->lbaf[i].lbads);
2092		strlcat(msg, buf, sizeof(msg));
2093		ms = lemtoh16(&idns->lbaf[i].ms);
2094		if (ms) {
2095			snprintf(buf, sizeof(buf), "+%u", ms);
2096			strlcat(msg, buf, sizeof(msg));
2097		}
2098		strlcat(msg, rpdesc[idns->lbaf[i].rp], sizeof(msg));
2099	}
2100	nvme_bio_status(bs, "Formats %s", msg);
2101
2102	if (idns->nsfeat)
2103		nvme_bio_status(bs, "Features 0x%b", idns->nsfeat,
2104		    NVME_ID_NS_NSFEAT_FMT);
2105
2106	if (idns->dps) {
2107		dps = idns->dps;
2108		snprintf(msg, sizeof(msg), "Data Protection (0x%02x) "
2109		    "Protection Data in ", dps);
2110		if (ISSET(dps, NVME_ID_NS_DPS_PIP))
2111			strlcat(msg, "first", sizeof(msg));
2112		else
2113			strlcat(msg, "last", sizeof(msg));
2114		strlcat(msg, "bytes of metadata, Protection ", sizeof(msg));
2115		if (NVME_ID_NS_DPS_TYPE(dps) >= nitems(protection))
2116			strlcat(msg, "Type unknown", sizeof(msg));
2117		else
2118			strlcat(msg, protection[NVME_ID_NS_DPS_TYPE(dps)],
2119			    sizeof(msg));
2120		nvme_bio_status(bs, "%s", msg);
2121	}
2122
2123	if (nvme_bioctl_sdname(sc, target) == NULL)
2124		bd->bd_status = BIOC_SDOFFLINE;
2125	else
2126		bd->bd_status = BIOC_SDONLINE;
2127
2128	return 0;
2129}
2130#endif	/* NBIO > 0 */
2131