1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8#include <sys/sysctl.h>
9#include <err.h>
10#include <errno.h>
11#include <libnvmf.h>
12#include <pthread.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <unistd.h>
17
18#include "internal.h"
19
20struct io_controller {
21	struct controller *c;
22
23	u_int num_io_queues;
24	u_int active_io_queues;
25	struct nvmf_qpair **io_qpairs;
26	int *io_sockets;
27
28	struct nvme_firmware_page fp;
29	struct nvme_health_information_page hip;
30	uint16_t partial_dur;
31	uint16_t partial_duw;
32
33	uint16_t cntlid;
34	char hostid[16];
35	char hostnqn[NVME_NQN_FIELD_SIZE];
36};
37
38static struct nvmf_association *io_na;
39static pthread_cond_t io_cond;
40static pthread_mutex_t io_na_mutex;
41static struct io_controller *io_controller;
42static const char *nqn;
43static char serial[NVME_SERIAL_NUMBER_LENGTH];
44
45void
46init_io(const char *subnqn)
47{
48	struct nvmf_association_params aparams;
49	u_long hostid;
50	size_t len;
51
52	memset(&aparams, 0, sizeof(aparams));
53	aparams.sq_flow_control = !flow_control_disable;
54	aparams.dynamic_controller_model = true;
55	aparams.max_admin_qsize = NVME_MAX_ADMIN_ENTRIES;
56	aparams.max_io_qsize = NVMF_MAX_IO_ENTRIES;
57	aparams.tcp.pda = 0;
58	aparams.tcp.header_digests = header_digests;
59	aparams.tcp.data_digests = data_digests;
60	aparams.tcp.maxr2t = 1;
61	aparams.tcp.maxh2cdata = 256 * 1024;
62	io_na = nvmf_allocate_association(NVMF_TRTYPE_TCP, true,
63	    &aparams);
64	if (io_na == NULL)
65		err(1, "Failed to create I/O controller association");
66
67	nqn = subnqn;
68
69	/* Generate a serial number from the kern.hostid node. */
70	len = sizeof(hostid);
71	if (sysctlbyname("kern.hostid", &hostid, &len, NULL, 0) == -1)
72		err(1, "sysctl: kern.hostid");
73
74	nvmf_controller_serial(serial, sizeof(serial), hostid);
75
76	pthread_cond_init(&io_cond, NULL);
77	pthread_mutex_init(&io_na_mutex, NULL);
78
79	if (kernel_io)
80		init_ctl_port(subnqn, &aparams);
81}
82
83void
84shutdown_io(void)
85{
86	if (kernel_io)
87		shutdown_ctl_port(nqn);
88}
89
90static void
91handle_get_log_page(struct io_controller *ioc, const struct nvmf_capsule *nc,
92    const struct nvme_command *cmd)
93{
94	uint64_t offset;
95	uint32_t numd;
96	size_t len;
97	uint8_t lid;
98
99	lid = le32toh(cmd->cdw10) & 0xff;
100	numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
101	offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
102
103	if (offset % 3 != 0)
104		goto error;
105
106	len = (numd + 1) * 4;
107
108	switch (lid) {
109	case NVME_LOG_ERROR:
110	{
111		void *buf;
112
113		if (len % sizeof(struct nvme_error_information_entry) != 0)
114			goto error;
115
116		buf = calloc(1, len);
117		nvmf_send_controller_data(nc, buf, len);
118		free(buf);
119		return;
120	}
121	case NVME_LOG_HEALTH_INFORMATION:
122		if (len != sizeof(ioc->hip))
123			goto error;
124
125		nvmf_send_controller_data(nc, &ioc->hip, sizeof(ioc->hip));
126		return;
127	case NVME_LOG_FIRMWARE_SLOT:
128		if (len != sizeof(ioc->fp))
129			goto error;
130
131		nvmf_send_controller_data(nc, &ioc->fp, sizeof(ioc->fp));
132		return;
133	default:
134		warnx("Unsupported page %#x for GET_LOG_PAGE\n", lid);
135		goto error;
136	}
137
138error:
139	nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
140}
141
142static bool
143handle_io_identify_command(const struct nvmf_capsule *nc,
144    const struct nvme_command *cmd)
145{
146	struct nvme_namespace_data nsdata;
147	struct nvme_ns_list nslist;
148	uint32_t nsid;
149	uint8_t cns;
150
151	cns = le32toh(cmd->cdw10) & 0xFF;
152	switch (cns) {
153	case 0:	/* Namespace data. */
154		if (!device_namespace_data(le32toh(cmd->nsid), &nsdata)) {
155			nvmf_send_generic_error(nc,
156			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
157			return (true);
158		}
159
160		nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata));
161		return (true);
162	case 2:	/* Active namespace list. */
163		nsid = le32toh(cmd->nsid);
164		if (nsid >= 0xfffffffe) {
165			nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
166			return (true);
167		}
168
169		device_active_nslist(nsid, &nslist);
170		nvmf_send_controller_data(nc, &nslist, sizeof(nslist));
171		return (true);
172	case 3:	/* Namespace Identification Descriptor list. */
173		if (!device_identification_descriptor(le32toh(cmd->nsid),
174		    &nsdata)) {
175			nvmf_send_generic_error(nc,
176			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
177			return (true);
178		}
179
180		nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata));
181		return (true);
182	default:
183		return (false);
184	}
185}
186
187static void
188handle_set_features(struct io_controller *ioc, const struct nvmf_capsule *nc,
189    const struct nvme_command *cmd)
190{
191	struct nvme_completion cqe;
192	uint8_t fid;
193
194	fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
195	switch (fid) {
196	case NVME_FEAT_NUMBER_OF_QUEUES:
197	{
198		uint32_t num_queues;
199
200		if (ioc->num_io_queues != 0) {
201			nvmf_send_generic_error(nc,
202			    NVME_SC_COMMAND_SEQUENCE_ERROR);
203			return;
204		}
205
206		num_queues = le32toh(cmd->cdw11) & 0xffff;
207
208		/* 5.12.1.7: 65535 is invalid. */
209		if (num_queues == 65535)
210			goto error;
211
212		/* Fabrics requires the same number of SQs and CQs. */
213		if (le32toh(cmd->cdw11) >> 16 != num_queues)
214			goto error;
215
216		/* Convert to 1's based */
217		num_queues++;
218
219		/* Lock to synchronize with handle_io_qpair. */
220		pthread_mutex_lock(&io_na_mutex);
221		ioc->num_io_queues = num_queues;
222		ioc->io_qpairs = calloc(num_queues, sizeof(*ioc->io_qpairs));
223		ioc->io_sockets = calloc(num_queues, sizeof(*ioc->io_sockets));
224		pthread_mutex_unlock(&io_na_mutex);
225
226		nvmf_init_cqe(&cqe, nc, 0);
227		cqe.cdw0 = cmd->cdw11;
228		nvmf_send_response(nc, &cqe);
229		return;
230	}
231	case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
232	{
233		uint32_t aer_mask;
234
235		aer_mask = le32toh(cmd->cdw11);
236
237		/* Check for any reserved or unimplemented feature bits. */
238		if ((aer_mask & 0xffffc000) != 0)
239			goto error;
240
241		/* No AERs are generated by this daemon. */
242		nvmf_send_success(nc);
243		return;
244	}
245	default:
246		warnx("Unsupported feature ID %u for SET_FEATURES", fid);
247		goto error;
248	}
249
250error:
251	nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
252}
253
254static bool
255admin_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd,
256    void *arg)
257{
258	struct io_controller *ioc = arg;
259
260	switch (cmd->opc) {
261	case NVME_OPC_GET_LOG_PAGE:
262		handle_get_log_page(ioc, nc, cmd);
263		return (true);
264	case NVME_OPC_IDENTIFY:
265		return (handle_io_identify_command(nc, cmd));
266	case NVME_OPC_SET_FEATURES:
267		handle_set_features(ioc, nc, cmd);
268		return (true);
269	case NVME_OPC_ASYNC_EVENT_REQUEST:
270		/* Ignore and never complete. */
271		return (true);
272	case NVME_OPC_KEEP_ALIVE:
273		nvmf_send_success(nc);
274		return (true);
275	default:
276		return (false);
277	}
278}
279
280static void
281handle_admin_qpair(struct io_controller *ioc)
282{
283	pthread_setname_np(pthread_self(), "admin queue");
284
285	controller_handle_admin_commands(ioc->c, admin_command, ioc);
286
287	pthread_mutex_lock(&io_na_mutex);
288	for (u_int i = 0; i < ioc->num_io_queues; i++) {
289		if (ioc->io_qpairs[i] == NULL || ioc->io_sockets[i] == -1)
290			continue;
291		close(ioc->io_sockets[i]);
292		ioc->io_sockets[i] = -1;
293	}
294
295	/* Wait for I/O threads to notice. */
296	while (ioc->active_io_queues > 0)
297		pthread_cond_wait(&io_cond, &io_na_mutex);
298
299	io_controller = NULL;
300	pthread_mutex_unlock(&io_na_mutex);
301
302	free_controller(ioc->c);
303
304	free(ioc);
305}
306
307static bool
308handle_io_fabrics_command(const struct nvmf_capsule *nc,
309    const struct nvmf_fabric_cmd *fc)
310{
311	switch (fc->fctype) {
312	case NVMF_FABRIC_COMMAND_CONNECT:
313		warnx("CONNECT command on connected queue");
314		nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
315		break;
316	case NVMF_FABRIC_COMMAND_DISCONNECT:
317	{
318		const struct nvmf_fabric_disconnect_cmd *dis =
319		    (const struct nvmf_fabric_disconnect_cmd *)fc;
320		if (dis->recfmt != htole16(0)) {
321			nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC,
322			    NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT);
323			break;
324		}
325		nvmf_send_success(nc);
326		return (true);
327	}
328	default:
329		warnx("Unsupported fabrics command %#x", fc->fctype);
330		nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
331		break;
332	}
333
334	return (false);
335}
336
337static void
338hip_add(uint64_t pair[2], uint64_t addend)
339{
340	uint64_t old, new;
341
342	old = le64toh(pair[0]);
343	new = old + addend;
344	pair[0] = htole64(new);
345	if (new < old)
346		pair[1] += htole64(1);
347}
348
349static uint64_t
350cmd_lba(const struct nvme_command *cmd)
351{
352	return ((uint64_t)le32toh(cmd->cdw11) << 32 | le32toh(cmd->cdw10));
353}
354
355static u_int
356cmd_nlb(const struct nvme_command *cmd)
357{
358	return ((le32toh(cmd->cdw12) & 0xffff) + 1);
359}
360
361static void
362handle_read(struct io_controller *ioc, const struct nvmf_capsule *nc,
363    const struct nvme_command *cmd)
364{
365	size_t len;
366
367	len = nvmf_capsule_data_len(nc);
368	device_read(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc);
369	hip_add(ioc->hip.host_read_commands, 1);
370
371	len /= 512;
372	len += ioc->partial_dur;
373	if (len > 1000)
374		hip_add(ioc->hip.data_units_read, len / 1000);
375	ioc->partial_dur = len % 1000;
376}
377
378static void
379handle_write(struct io_controller *ioc, const struct nvmf_capsule *nc,
380    const struct nvme_command *cmd)
381{
382	size_t len;
383
384	len = nvmf_capsule_data_len(nc);
385	device_write(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc);
386	hip_add(ioc->hip.host_write_commands, 1);
387
388	len /= 512;
389	len += ioc->partial_duw;
390	if (len > 1000)
391		hip_add(ioc->hip.data_units_written, len / 1000);
392	ioc->partial_duw = len % 1000;
393}
394
395static void
396handle_flush(const struct nvmf_capsule *nc, const struct nvme_command *cmd)
397{
398	device_flush(le32toh(cmd->nsid), nc);
399}
400
401static bool
402handle_io_commands(struct io_controller *ioc, struct nvmf_qpair *qp)
403{
404	const struct nvme_command *cmd;
405	struct nvmf_capsule *nc;
406	int error;
407	bool disconnect;
408
409	disconnect = false;
410
411	while (!disconnect) {
412		error = nvmf_controller_receive_capsule(qp, &nc);
413		if (error != 0) {
414			if (error != ECONNRESET)
415				warnc(error, "Failed to read command capsule");
416			break;
417		}
418
419		cmd = nvmf_capsule_sqe(nc);
420
421		switch (cmd->opc) {
422		case NVME_OPC_FLUSH:
423			if (cmd->nsid == htole32(0xffffffff)) {
424				nvmf_send_generic_error(nc,
425				    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
426				break;
427			}
428			handle_flush(nc, cmd);
429			break;
430		case NVME_OPC_WRITE:
431			handle_write(ioc, nc, cmd);
432			break;
433		case NVME_OPC_READ:
434			handle_read(ioc, nc, cmd);
435			break;
436		case NVME_OPC_FABRICS_COMMANDS:
437			disconnect = handle_io_fabrics_command(nc,
438			    (const struct nvmf_fabric_cmd *)cmd);
439			break;
440		default:
441			warnx("Unsupported NVM opcode %#x", cmd->opc);
442			nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
443			break;
444		}
445		nvmf_free_capsule(nc);
446	}
447
448	return (disconnect);
449}
450
451static void
452handle_io_qpair(struct io_controller *ioc, struct nvmf_qpair *qp, int qid)
453{
454	char name[64];
455	bool disconnect;
456
457	snprintf(name, sizeof(name), "I/O queue %d", qid);
458	pthread_setname_np(pthread_self(), name);
459
460	disconnect = handle_io_commands(ioc, qp);
461
462	pthread_mutex_lock(&io_na_mutex);
463	if (disconnect)
464		ioc->io_qpairs[qid - 1] = NULL;
465	if (ioc->io_sockets[qid - 1] != -1) {
466		close(ioc->io_sockets[qid - 1]);
467		ioc->io_sockets[qid - 1] = -1;
468	}
469	ioc->active_io_queues--;
470	if (ioc->active_io_queues == 0)
471		pthread_cond_broadcast(&io_cond);
472	pthread_mutex_unlock(&io_na_mutex);
473}
474
475static void
476connect_admin_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc,
477    const struct nvmf_fabric_connect_data *data)
478{
479	struct nvme_controller_data cdata;
480	struct io_controller *ioc;
481	int error;
482
483	/* Can only have one active I/O controller at a time. */
484	pthread_mutex_lock(&io_na_mutex);
485	if (io_controller != NULL) {
486		pthread_mutex_unlock(&io_na_mutex);
487		nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC,
488		    NVMF_FABRIC_SC_CONTROLLER_BUSY);
489		goto error;
490	}
491
492	error = nvmf_finish_accept(nc, 2);
493	if (error != 0) {
494		pthread_mutex_unlock(&io_na_mutex);
495		warnc(error, "Failed to send CONNECT response");
496		goto error;
497	}
498
499	ioc = calloc(1, sizeof(*ioc));
500	ioc->cntlid = 2;
501	memcpy(ioc->hostid, data->hostid, sizeof(ioc->hostid));
502	memcpy(ioc->hostnqn, data->hostnqn, sizeof(ioc->hostnqn));
503
504	nvmf_init_io_controller_data(qp, serial, nqn, device_count(),
505	    NVMF_IOCCSZ, &cdata);
506
507	ioc->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
508	memcpy(ioc->fp.revision[0], cdata.fr, sizeof(cdata.fr));
509
510	ioc->hip.power_cycles[0] = 1;
511
512	ioc->c = init_controller(qp, &cdata);
513
514	io_controller = ioc;
515	pthread_mutex_unlock(&io_na_mutex);
516
517	nvmf_free_capsule(nc);
518
519	handle_admin_qpair(ioc);
520	close(s);
521	return;
522
523error:
524	nvmf_free_capsule(nc);
525	close(s);
526}
527
528static void
529connect_io_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc,
530    const struct nvmf_fabric_connect_data *data, uint16_t qid)
531{
532	struct io_controller *ioc;
533	int error;
534
535	pthread_mutex_lock(&io_na_mutex);
536	if (io_controller == NULL) {
537		pthread_mutex_unlock(&io_na_mutex);
538		warnx("Attempt to create I/O qpair without admin qpair");
539		nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
540		goto error;
541	}
542
543	if (memcmp(io_controller->hostid, data->hostid,
544	    sizeof(data->hostid)) != 0) {
545		pthread_mutex_unlock(&io_na_mutex);
546		warnx("hostid mismatch for I/O qpair CONNECT");
547		nvmf_connect_invalid_parameters(nc, true,
548		    offsetof(struct nvmf_fabric_connect_data, hostid));
549		goto error;
550	}
551	if (le16toh(data->cntlid) != io_controller->cntlid) {
552		pthread_mutex_unlock(&io_na_mutex);
553		warnx("cntlid mismatch for I/O qpair CONNECT");
554		nvmf_connect_invalid_parameters(nc, true,
555		    offsetof(struct nvmf_fabric_connect_data, cntlid));
556		goto error;
557	}
558	if (memcmp(io_controller->hostnqn, data->hostnqn,
559	    sizeof(data->hostid)) != 0) {
560		pthread_mutex_unlock(&io_na_mutex);
561		warnx("host NQN mismatch for I/O qpair CONNECT");
562		nvmf_connect_invalid_parameters(nc, true,
563		    offsetof(struct nvmf_fabric_connect_data, hostnqn));
564		goto error;
565	}
566
567	if (io_controller->num_io_queues == 0) {
568		pthread_mutex_unlock(&io_na_mutex);
569		warnx("Attempt to create I/O qpair without enabled queues");
570		nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
571		goto error;
572	}
573	if (qid > io_controller->num_io_queues) {
574		pthread_mutex_unlock(&io_na_mutex);
575		warnx("Attempt to create invalid I/O qpair %u", qid);
576		nvmf_connect_invalid_parameters(nc, false,
577		    offsetof(struct nvmf_fabric_connect_cmd, qid));
578		goto error;
579	}
580	if (io_controller->io_qpairs[qid - 1] != NULL) {
581		pthread_mutex_unlock(&io_na_mutex);
582		warnx("Attempt to re-create I/O qpair %u", qid);
583		nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
584		goto error;
585	}
586
587	error = nvmf_finish_accept(nc, io_controller->cntlid);
588	if (error != 0) {
589		pthread_mutex_unlock(&io_na_mutex);
590		warnc(error, "Failed to send CONNECT response");
591		goto error;
592	}
593
594	ioc = io_controller;
595	ioc->active_io_queues++;
596	ioc->io_qpairs[qid - 1] = qp;
597	ioc->io_sockets[qid - 1] = s;
598	pthread_mutex_unlock(&io_na_mutex);
599
600	nvmf_free_capsule(nc);
601
602	handle_io_qpair(ioc, qp, qid);
603	return;
604
605error:
606	nvmf_free_capsule(nc);
607	close(s);
608}
609
610static void *
611io_socket_thread(void *arg)
612{
613	struct nvmf_fabric_connect_data data;
614	struct nvmf_qpair_params qparams;
615	const struct nvmf_fabric_connect_cmd *cmd;
616	struct nvmf_capsule *nc;
617	struct nvmf_qpair *qp;
618	int s;
619
620	pthread_detach(pthread_self());
621
622	s = (intptr_t)arg;
623	memset(&qparams, 0, sizeof(qparams));
624	qparams.tcp.fd = s;
625
626	nc = NULL;
627	qp = nvmf_accept(io_na, &qparams, &nc, &data);
628	if (qp == NULL) {
629		warnx("Failed to create I/O qpair: %s",
630		    nvmf_association_error(io_na));
631		goto error;
632	}
633
634	if (kernel_io) {
635		ctl_handoff_qpair(qp, nvmf_capsule_sqe(nc), &data);
636		goto error;
637	}
638
639	if (strcmp(data.subnqn, nqn) != 0) {
640		warn("I/O qpair with invalid SubNQN: %.*s",
641		    (int)sizeof(data.subnqn), data.subnqn);
642		nvmf_connect_invalid_parameters(nc, true,
643		    offsetof(struct nvmf_fabric_connect_data, subnqn));
644		goto error;
645	}
646
647	/* Is this an admin or I/O queue pair? */
648	cmd = nvmf_capsule_sqe(nc);
649	if (cmd->qid == 0)
650		connect_admin_qpair(s, qp, nc, &data);
651	else
652		connect_io_qpair(s, qp, nc, &data, le16toh(cmd->qid));
653	nvmf_free_qpair(qp);
654	return (NULL);
655
656error:
657	if (nc != NULL)
658		nvmf_free_capsule(nc);
659	if (qp != NULL)
660		nvmf_free_qpair(qp);
661	close(s);
662	return (NULL);
663}
664
665void
666handle_io_socket(int s)
667{
668	pthread_t thr;
669	int error;
670
671	error = pthread_create(&thr, NULL, io_socket_thread,
672	    (void *)(uintptr_t)s);
673	if (error != 0) {
674		warnc(error, "Failed to create I/O qpair thread");
675		close(s);
676	}
677}
678