ntb_transport.c revision 304380
1/*-
2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (C) 2013 Intel Corporation
4 * Copyright (C) 2015 EMC Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31 * two or more systems using a PCI-e links, providing remote memory access.
32 *
33 * This module contains a transport for sending and receiving messages by
34 * writing to remote memory window(s) provided by underlying NTB device.
35 *
36 * NOTE: Much of the code in this module is shared with Linux. Any patches may
37 * be picked up and redistributed in Linux with a dual GPL/BSD license.
38 */
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: stable/10/sys/dev/ntb/ntb_transport.c 304380 2016-08-18 10:39:00Z mav $");
42
43#include <sys/param.h>
44#include <sys/kernel.h>
45#include <sys/systm.h>
46#include <sys/bitset.h>
47#include <sys/bus.h>
48#include <sys/ktr.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mutex.h>
55#include <sys/queue.h>
56#include <sys/sysctl.h>
57#include <sys/taskqueue.h>
58
59#include <vm/vm.h>
60#include <vm/pmap.h>
61
62#include <machine/bus.h>
63
64#include "ntb.h"
65#include "ntb_transport.h"
66
67#define QP_SETSIZE	64
68BITSET_DEFINE(_qpset, QP_SETSIZE);
69#define test_bit(pos, addr)	BIT_ISSET(QP_SETSIZE, (pos), (addr))
70#define set_bit(pos, addr)	BIT_SET(QP_SETSIZE, (pos), (addr))
71#define clear_bit(pos, addr)	BIT_CLR(QP_SETSIZE, (pos), (addr))
72#define ffs_bit(addr)		BIT_FFS(QP_SETSIZE, (addr))
73
74#define KTR_NTB KTR_SPARE3
75
76#define NTB_TRANSPORT_VERSION	4
77
78static SYSCTL_NODE(_hw, OID_AUTO, ntb_transport, CTLFLAG_RW, 0, "ntb_transport");
79
80static unsigned g_ntb_transport_debug_level;
81TUNABLE_INT("hw.ntb_transport.debug_level", &g_ntb_transport_debug_level);
82SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, debug_level, CTLFLAG_RWTUN,
83    &g_ntb_transport_debug_level, 0,
84    "ntb_transport log level -- higher is more verbose");
85#define ntb_printf(lvl, ...) do {			\
86	if ((lvl) <= g_ntb_transport_debug_level) {	\
87		printf(__VA_ARGS__);			\
88	}						\
89} while (0)
90
91static unsigned transport_mtu = 0x10000;
92
93static uint64_t max_mw_size;
94TUNABLE_QUAD("hw.ntb_transport.max_mw_size", &max_mw_size);
95SYSCTL_UQUAD(_hw_ntb_transport, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0,
96    "If enabled (non-zero), limit the size of large memory windows. "
97    "Both sides of the NTB MUST set the same value here.");
98
99static unsigned max_num_clients;
100TUNABLE_INT("hw.ntb_transport.max_num_clients", &max_num_clients);
101SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, max_num_clients, CTLFLAG_RDTUN,
102    &max_num_clients, 0, "Maximum number of NTB transport clients.  "
103    "0 (default) - use all available NTB memory windows; "
104    "positive integer N - Limit to N memory windows.");
105
106static unsigned enable_xeon_watchdog;
107TUNABLE_INT("hw.ntb_transport.enable_xeon_watchdog", &enable_xeon_watchdog);
108SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
109    &enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
110    "keep a watchdog from tearing down the NTB link");
111
112STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
113
114typedef uint32_t ntb_q_idx_t;
115
116struct ntb_queue_entry {
117	/* ntb_queue list reference */
118	STAILQ_ENTRY(ntb_queue_entry) entry;
119
120	/* info on data to be transferred */
121	void		*cb_data;
122	void		*buf;
123	uint32_t	len;
124	uint32_t	flags;
125
126	struct ntb_transport_qp		*qp;
127	struct ntb_payload_header	*x_hdr;
128	ntb_q_idx_t	index;
129};
130
131struct ntb_rx_info {
132	ntb_q_idx_t	entry;
133};
134
135struct ntb_transport_qp {
136	struct ntb_transport_ctx	*transport;
137	device_t		 ntb;
138
139	void			*cb_data;
140
141	bool			client_ready;
142	volatile bool		link_is_up;
143	uint8_t			qp_num;	/* Only 64 QPs are allowed.  0-63 */
144
145	struct ntb_rx_info	*rx_info;
146	struct ntb_rx_info	*remote_rx_info;
147
148	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
149	    void *data, int len);
150	struct ntb_queue_list	tx_free_q;
151	struct mtx		ntb_tx_free_q_lock;
152	caddr_t			tx_mw;
153	bus_addr_t		tx_mw_phys;
154	ntb_q_idx_t		tx_index;
155	ntb_q_idx_t		tx_max_entry;
156	uint64_t		tx_max_frame;
157
158	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
159	    void *data, int len);
160	struct ntb_queue_list	rx_post_q;
161	struct ntb_queue_list	rx_pend_q;
162	/* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
163	struct mtx		ntb_rx_q_lock;
164	struct task		rx_completion_task;
165	struct task		rxc_db_work;
166	caddr_t			rx_buff;
167	ntb_q_idx_t		rx_index;
168	ntb_q_idx_t		rx_max_entry;
169	uint64_t		rx_max_frame;
170
171	void (*event_handler)(void *data, enum ntb_link_event status);
172	struct callout		link_work;
173	struct callout		rx_full;
174
175	uint64_t		last_rx_no_buf;
176
177	/* Stats */
178	uint64_t		rx_bytes;
179	uint64_t		rx_pkts;
180	uint64_t		rx_ring_empty;
181	uint64_t		rx_err_no_buf;
182	uint64_t		rx_err_oflow;
183	uint64_t		rx_err_ver;
184	uint64_t		tx_bytes;
185	uint64_t		tx_pkts;
186	uint64_t		tx_ring_full;
187	uint64_t		tx_err_no_buf;
188};
189
190struct ntb_transport_mw {
191	vm_paddr_t	phys_addr;
192	size_t		phys_size;
193	size_t		xlat_align;
194	size_t		xlat_align_size;
195	bus_addr_t	addr_limit;
196	/* Tx buff is off vbase / phys_addr */
197	caddr_t		vbase;
198	size_t		xlat_size;
199	size_t		buff_size;
200	/* Rx buff is off virt_addr / dma_addr */
201	caddr_t		virt_addr;
202	bus_addr_t	dma_addr;
203};
204
205struct ntb_transport_ctx {
206	device_t		 ntb;
207	struct ntb_transport_mw	*mw_vec;
208	struct ntb_transport_qp	*qp_vec;
209	struct _qpset		qp_bitmap;
210	struct _qpset		qp_bitmap_free;
211	unsigned		mw_count;
212	unsigned		qp_count;
213	volatile bool		link_is_up;
214	struct callout		link_work;
215	struct callout		link_watchdog;
216	struct task		link_cleanup;
217	struct mtx		tx_lock;
218	struct mtx		rx_lock;
219};
220
221enum {
222	NTBT_DESC_DONE_FLAG = 1 << 0,
223	NTBT_LINK_DOWN_FLAG = 1 << 1,
224};
225
226struct ntb_payload_header {
227	ntb_q_idx_t ver;
228	uint32_t len;
229	uint32_t flags;
230};
231
232enum {
233	/*
234	 * The order of this enum is part of the remote protocol.  Do not
235	 * reorder without bumping protocol version (and it's probably best
236	 * to keep the protocol in lock-step with the Linux NTB driver.
237	 */
238	NTBT_VERSION = 0,
239	NTBT_QP_LINKS,
240	NTBT_NUM_QPS,
241	NTBT_NUM_MWS,
242	/*
243	 * N.B.: transport_link_work assumes MW1 enums = MW0 + 2.
244	 */
245	NTBT_MW0_SZ_HIGH,
246	NTBT_MW0_SZ_LOW,
247	NTBT_MW1_SZ_HIGH,
248	NTBT_MW1_SZ_LOW,
249	NTBT_MAX_SPAD,
250
251	/*
252	 * Some NTB-using hardware have a watchdog to work around NTB hangs; if
253	 * a register or doorbell isn't written every few seconds, the link is
254	 * torn down.  Write an otherwise unused register every few seconds to
255	 * work around this watchdog.
256	 */
257	NTBT_WATCHDOG_SPAD = 15
258};
259
260#define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
261#define NTB_QP_DEF_NUM_ENTRIES	100
262#define NTB_LINK_DOWN_TIMEOUT	10
263
264static int ntb_transport_probe(device_t dev);
265static int ntb_transport_attach(device_t dev);
266static int ntb_transport_detach(device_t dev);
267static void ntb_transport_init_queue(struct ntb_transport_ctx *nt,
268    unsigned int qp_num);
269static int ntb_process_tx(struct ntb_transport_qp *qp,
270    struct ntb_queue_entry *entry);
271static void ntb_memcpy_tx(struct ntb_transport_qp *qp,
272    struct ntb_queue_entry *entry, void *offset);
273static void ntb_transport_rxc_db(void *arg, int pending);
274static int ntb_process_rxc(struct ntb_transport_qp *qp);
275static void ntb_memcpy_rx(struct ntb_transport_qp *qp,
276    struct ntb_queue_entry *entry, void *offset);
277static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp,
278    void *data);
279static void ntb_complete_rxc(void *arg, int pending);
280static void ntb_transport_doorbell_callback(void *data, uint32_t vector);
281static void ntb_transport_event_callback(void *data);
282static void ntb_transport_link_work(void *arg);
283static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size);
284static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw);
285static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
286    unsigned int qp_num);
287static void ntb_qp_link_work(void *arg);
288static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt);
289static void ntb_transport_link_cleanup_work(void *, int);
290static void ntb_qp_link_down(struct ntb_transport_qp *qp);
291static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp);
292static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp);
293static void ntb_send_link_down(struct ntb_transport_qp *qp);
294static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
295    struct ntb_queue_list *list);
296static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock,
297    struct ntb_queue_list *list);
298static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
299    struct ntb_queue_list *from, struct ntb_queue_list *to);
300static void xeon_link_watchdog_hb(void *);
301
302static const struct ntb_ctx_ops ntb_transport_ops = {
303	.link_event = ntb_transport_event_callback,
304	.db_event = ntb_transport_doorbell_callback,
305};
306
307MALLOC_DEFINE(M_NTB_T, "ntb_transport", "ntb transport driver");
308
309static inline void
310iowrite32(uint32_t val, void *addr)
311{
312
313	bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr,
314	    val);
315}
316
317/* Transport Init and teardown */
318
319static void
320xeon_link_watchdog_hb(void *arg)
321{
322	struct ntb_transport_ctx *nt;
323
324	nt = arg;
325	NTB_SPAD_WRITE(nt->ntb, NTBT_WATCHDOG_SPAD, 0);
326	callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
327}
328
329static int
330ntb_transport_probe(device_t dev)
331{
332
333	device_set_desc(dev, "NTB Transport");
334	return (0);
335}
336
337static int
338ntb_transport_attach(device_t dev)
339{
340	struct ntb_transport_ctx *nt = device_get_softc(dev);
341	device_t ntb = device_get_parent(dev);
342	struct ntb_transport_mw *mw;
343	uint64_t qp_bitmap;
344	int rc;
345	unsigned i;
346
347	nt->ntb = ntb;
348	nt->mw_count = NTB_MW_COUNT(ntb);
349	nt->mw_vec = malloc(nt->mw_count * sizeof(*nt->mw_vec), M_NTB_T,
350	    M_WAITOK | M_ZERO);
351	for (i = 0; i < nt->mw_count; i++) {
352		mw = &nt->mw_vec[i];
353
354		rc = NTB_MW_GET_RANGE(ntb, i, &mw->phys_addr, &mw->vbase,
355		    &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size,
356		    &mw->addr_limit);
357		if (rc != 0)
358			goto err;
359
360		mw->buff_size = 0;
361		mw->xlat_size = 0;
362		mw->virt_addr = NULL;
363		mw->dma_addr = 0;
364
365		rc = NTB_MW_SET_WC(nt->ntb, i, VM_MEMATTR_WRITE_COMBINING);
366		if (rc)
367			ntb_printf(0, "Unable to set mw%d caching\n", i);
368	}
369
370	qp_bitmap = NTB_DB_VALID_MASK(ntb);
371	nt->qp_count = flsll(qp_bitmap);
372	KASSERT(nt->qp_count != 0, ("bogus db bitmap"));
373	nt->qp_count -= 1;
374
375	if (max_num_clients != 0 && max_num_clients < nt->qp_count)
376		nt->qp_count = max_num_clients;
377	else if (nt->mw_count < nt->qp_count)
378		nt->qp_count = nt->mw_count;
379	KASSERT(nt->qp_count <= QP_SETSIZE, ("invalid qp_count"));
380
381	mtx_init(&nt->tx_lock, "ntb transport tx", NULL, MTX_DEF);
382	mtx_init(&nt->rx_lock, "ntb transport rx", NULL, MTX_DEF);
383
384	nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_T,
385	    M_WAITOK | M_ZERO);
386
387	for (i = 0; i < nt->qp_count; i++) {
388		set_bit(i, &nt->qp_bitmap);
389		set_bit(i, &nt->qp_bitmap_free);
390		ntb_transport_init_queue(nt, i);
391	}
392
393	callout_init(&nt->link_work, 0);
394	callout_init(&nt->link_watchdog, 0);
395	TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
396
397	rc = NTB_SET_CTX(ntb, nt, &ntb_transport_ops);
398	if (rc != 0)
399		goto err;
400
401	nt->link_is_up = false;
402	NTB_LINK_ENABLE(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
403
404	callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
405	if (enable_xeon_watchdog != 0)
406		callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
407
408	/* Attach children to this transport */
409	device_add_child(dev, NULL, -1);
410	bus_generic_attach(dev);
411
412	return (0);
413
414err:
415	free(nt->qp_vec, M_NTB_T);
416	free(nt->mw_vec, M_NTB_T);
417	return (rc);
418}
419
420static int
421ntb_transport_detach(device_t dev)
422{
423	struct ntb_transport_ctx *nt = device_get_softc(dev);
424	device_t ntb = nt->ntb;
425	struct _qpset qp_bitmap_alloc;
426	uint8_t i;
427
428	/* Detach & delete all children */
429	device_delete_children(dev);
430
431	ntb_transport_link_cleanup(nt);
432	taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
433	callout_drain(&nt->link_work);
434	callout_drain(&nt->link_watchdog);
435
436	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
437	BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
438
439	/* Verify that all the QPs are freed */
440	for (i = 0; i < nt->qp_count; i++)
441		if (test_bit(i, &qp_bitmap_alloc))
442			ntb_transport_free_queue(&nt->qp_vec[i]);
443
444	NTB_LINK_DISABLE(ntb);
445	NTB_CLEAR_CTX(ntb);
446
447	for (i = 0; i < nt->mw_count; i++)
448		ntb_free_mw(nt, i);
449
450	free(nt->qp_vec, M_NTB_T);
451	free(nt->mw_vec, M_NTB_T);
452	return (0);
453}
454
455static void
456ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num)
457{
458	struct ntb_transport_mw *mw;
459	struct ntb_transport_qp *qp;
460	vm_paddr_t mw_base;
461	uint64_t mw_size, qp_offset;
462	size_t tx_size;
463	unsigned num_qps_mw, mw_num, mw_count;
464
465	mw_count = nt->mw_count;
466	mw_num = QP_TO_MW(nt, qp_num);
467	mw = &nt->mw_vec[mw_num];
468
469	qp = &nt->qp_vec[qp_num];
470	qp->qp_num = qp_num;
471	qp->transport = nt;
472	qp->ntb = nt->ntb;
473	qp->client_ready = false;
474	qp->event_handler = NULL;
475	ntb_qp_link_down_reset(qp);
476
477	if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
478		num_qps_mw = nt->qp_count / mw_count + 1;
479	else
480		num_qps_mw = nt->qp_count / mw_count;
481
482	mw_base = mw->phys_addr;
483	mw_size = mw->phys_size;
484
485	tx_size = mw_size / num_qps_mw;
486	qp_offset = tx_size * (qp_num / mw_count);
487
488	qp->tx_mw = mw->vbase + qp_offset;
489	KASSERT(qp->tx_mw != NULL, ("uh oh?"));
490
491	/* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */
492	qp->tx_mw_phys = mw_base + qp_offset;
493	KASSERT(qp->tx_mw_phys != 0, ("uh oh?"));
494
495	tx_size -= sizeof(struct ntb_rx_info);
496	qp->rx_info = (void *)(qp->tx_mw + tx_size);
497
498	/* Due to house-keeping, there must be at least 2 buffs */
499	qp->tx_max_frame = qmin(tx_size / 2,
500	    transport_mtu + sizeof(struct ntb_payload_header));
501	qp->tx_max_entry = tx_size / qp->tx_max_frame;
502
503	callout_init(&qp->link_work, 0);
504	callout_init(&qp->rx_full, 1);
505
506	mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN);
507	mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN);
508	TASK_INIT(&qp->rx_completion_task, 0, ntb_complete_rxc, qp);
509	TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp);
510
511	STAILQ_INIT(&qp->rx_post_q);
512	STAILQ_INIT(&qp->rx_pend_q);
513	STAILQ_INIT(&qp->tx_free_q);
514
515	callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
516}
517
518void
519ntb_transport_free_queue(struct ntb_transport_qp *qp)
520{
521	struct ntb_queue_entry *entry;
522
523	if (qp == NULL)
524		return;
525
526	callout_drain(&qp->link_work);
527
528	NTB_DB_SET_MASK(qp->ntb, 1ull << qp->qp_num);
529	taskqueue_drain(taskqueue_swi, &qp->rxc_db_work);
530	taskqueue_drain(taskqueue_swi, &qp->rx_completion_task);
531
532	qp->cb_data = NULL;
533	qp->rx_handler = NULL;
534	qp->tx_handler = NULL;
535	qp->event_handler = NULL;
536
537	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q)))
538		free(entry, M_NTB_T);
539
540	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q)))
541		free(entry, M_NTB_T);
542
543	while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
544		free(entry, M_NTB_T);
545
546	set_bit(qp->qp_num, &qp->transport->qp_bitmap_free);
547}
548
549/**
550 * ntb_transport_create_queue - Create a new NTB transport layer queue
551 * @rx_handler: receive callback function
552 * @tx_handler: transmit callback function
553 * @event_handler: event callback function
554 *
555 * Create a new NTB transport layer queue and provide the queue with a callback
556 * routine for both transmit and receive.  The receive callback routine will be
557 * used to pass up data when the transport has received it on the queue.   The
558 * transmit callback routine will be called when the transport has completed the
559 * transmission of the data on the queue and the data is ready to be freed.
560 *
561 * RETURNS: pointer to newly created ntb_queue, NULL on error.
562 */
563struct ntb_transport_qp *
564ntb_transport_create_queue(void *data, device_t dev,
565    const struct ntb_queue_handlers *handlers)
566{
567	struct ntb_transport_ctx *nt = device_get_softc(dev);
568	device_t ntb = device_get_parent(dev);
569	struct ntb_queue_entry *entry;
570	struct ntb_transport_qp *qp;
571	unsigned int free_queue;
572	int i;
573
574	free_queue = ffs_bit(&nt->qp_bitmap);
575	if (free_queue == 0)
576		return (NULL);
577
578	/* decrement free_queue to make it zero based */
579	free_queue--;
580
581	qp = &nt->qp_vec[free_queue];
582	clear_bit(qp->qp_num, &nt->qp_bitmap_free);
583	qp->cb_data = data;
584	qp->rx_handler = handlers->rx_handler;
585	qp->tx_handler = handlers->tx_handler;
586	qp->event_handler = handlers->event_handler;
587
588	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
589		entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
590		entry->cb_data = data;
591		entry->buf = NULL;
592		entry->len = transport_mtu;
593		ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q);
594	}
595
596	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
597		entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
598		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
599	}
600
601	NTB_DB_CLEAR(ntb, 1ull << qp->qp_num);
602	NTB_DB_CLEAR_MASK(ntb, 1ull << qp->qp_num);
603	return (qp);
604}
605
606/**
607 * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
608 * @qp: NTB transport layer queue to be enabled
609 *
610 * Notify NTB transport layer of client readiness to use queue
611 */
612void
613ntb_transport_link_up(struct ntb_transport_qp *qp)
614{
615	struct ntb_transport_ctx *nt = qp->transport;
616
617	qp->client_ready = true;
618
619	ntb_printf(2, "qp client ready\n");
620
621	if (nt->link_is_up)
622		callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
623}
624
625
626
627/* Transport Tx */
628
629/**
630 * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
631 * @qp: NTB transport layer queue the entry is to be enqueued on
632 * @cb: per buffer pointer for callback function to use
633 * @data: pointer to data buffer that will be sent
634 * @len: length of the data buffer
635 *
636 * Enqueue a new transmit buffer onto the transport queue from which a NTB
637 * payload will be transmitted.  This assumes that a lock is being held to
638 * serialize access to the qp.
639 *
640 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
641 */
642int
643ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
644    unsigned int len)
645{
646	struct ntb_queue_entry *entry;
647	int rc;
648
649	if (qp == NULL || !qp->link_is_up || len == 0) {
650		CTR0(KTR_NTB, "TX: link not up");
651		return (EINVAL);
652	}
653
654	entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
655	if (entry == NULL) {
656		CTR0(KTR_NTB, "TX: could not get entry from tx_free_q");
657		qp->tx_err_no_buf++;
658		return (EBUSY);
659	}
660	CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry);
661
662	entry->cb_data = cb;
663	entry->buf = data;
664	entry->len = len;
665	entry->flags = 0;
666
667	mtx_lock(&qp->transport->tx_lock);
668	rc = ntb_process_tx(qp, entry);
669	mtx_unlock(&qp->transport->tx_lock);
670	if (rc != 0) {
671		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
672		CTR1(KTR_NTB,
673		    "TX: process_tx failed. Returning entry %p to tx_free_q",
674		    entry);
675	}
676	return (rc);
677}
678
679static int
680ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
681{
682	void *offset;
683
684	offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
685	CTR3(KTR_NTB,
686	    "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u",
687	    qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry);
688	if (qp->tx_index == qp->remote_rx_info->entry) {
689		CTR0(KTR_NTB, "TX: ring full");
690		qp->tx_ring_full++;
691		return (EAGAIN);
692	}
693
694	if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
695		if (qp->tx_handler != NULL)
696			qp->tx_handler(qp, qp->cb_data, entry->buf,
697			    EIO);
698		else
699			m_freem(entry->buf);
700
701		entry->buf = NULL;
702		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
703		CTR1(KTR_NTB,
704		    "TX: frame too big. returning entry %p to tx_free_q",
705		    entry);
706		return (0);
707	}
708	CTR2(KTR_NTB, "TX: copying entry %p to offset %p", entry, offset);
709	ntb_memcpy_tx(qp, entry, offset);
710
711	qp->tx_index++;
712	qp->tx_index %= qp->tx_max_entry;
713
714	qp->tx_pkts++;
715
716	return (0);
717}
718
719static void
720ntb_memcpy_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
721    void *offset)
722{
723	struct ntb_payload_header *hdr;
724
725	/* This piece is from Linux' ntb_async_tx() */
726	hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame -
727	    sizeof(struct ntb_payload_header));
728	entry->x_hdr = hdr;
729	iowrite32(entry->len, &hdr->len);
730	iowrite32(qp->tx_pkts, &hdr->ver);
731
732	/* This piece is ntb_memcpy_tx() */
733	CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset);
734	if (entry->buf != NULL) {
735		m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset);
736
737		/*
738		 * Ensure that the data is fully copied before setting the
739		 * flags
740		 */
741		wmb();
742	}
743
744	/* The rest is ntb_tx_copy_callback() */
745	iowrite32(entry->flags | NTBT_DESC_DONE_FLAG, &hdr->flags);
746	CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr);
747
748	NTB_PEER_DB_SET(qp->ntb, 1ull << qp->qp_num);
749
750	/*
751	 * The entry length can only be zero if the packet is intended to be a
752	 * "link down" or similar.  Since no payload is being sent in these
753	 * cases, there is nothing to add to the completion queue.
754	 */
755	if (entry->len > 0) {
756		qp->tx_bytes += entry->len;
757
758		if (qp->tx_handler)
759			qp->tx_handler(qp, qp->cb_data, entry->buf,
760			    entry->len);
761		else
762			m_freem(entry->buf);
763		entry->buf = NULL;
764	}
765
766	CTR3(KTR_NTB,
767	    "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning "
768	    "to tx_free_q", entry, hdr->ver, hdr->flags);
769	ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
770}
771
772/* Transport Rx */
773static void
774ntb_transport_rxc_db(void *arg, int pending __unused)
775{
776	struct ntb_transport_qp *qp = arg;
777	ntb_q_idx_t i;
778	int rc;
779
780	/*
781	 * Limit the number of packets processed in a single interrupt to
782	 * provide fairness to others
783	 */
784	CTR0(KTR_NTB, "RX: transport_rx");
785	mtx_lock(&qp->transport->rx_lock);
786	for (i = 0; i < qp->rx_max_entry; i++) {
787		rc = ntb_process_rxc(qp);
788		if (rc != 0) {
789			CTR0(KTR_NTB, "RX: process_rxc failed");
790			break;
791		}
792	}
793	mtx_unlock(&qp->transport->rx_lock);
794
795	if (i == qp->rx_max_entry)
796		taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
797	else if ((NTB_DB_READ(qp->ntb) & (1ull << qp->qp_num)) != 0) {
798		/* If db is set, clear it and read it back to commit clear. */
799		NTB_DB_CLEAR(qp->ntb, 1ull << qp->qp_num);
800		(void)NTB_DB_READ(qp->ntb);
801
802		/*
803		 * An interrupt may have arrived between finishing
804		 * ntb_process_rxc and clearing the doorbell bit: there might
805		 * be some more work to do.
806		 */
807		taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
808	}
809}
810
811static int
812ntb_process_rxc(struct ntb_transport_qp *qp)
813{
814	struct ntb_payload_header *hdr;
815	struct ntb_queue_entry *entry;
816	caddr_t offset;
817
818	offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
819	hdr = (void *)(offset + qp->rx_max_frame -
820	    sizeof(struct ntb_payload_header));
821
822	CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index);
823	if ((hdr->flags & NTBT_DESC_DONE_FLAG) == 0) {
824		CTR0(KTR_NTB, "RX: hdr not done");
825		qp->rx_ring_empty++;
826		return (EAGAIN);
827	}
828
829	if ((hdr->flags & NTBT_LINK_DOWN_FLAG) != 0) {
830		CTR0(KTR_NTB, "RX: link down");
831		ntb_qp_link_down(qp);
832		hdr->flags = 0;
833		return (EAGAIN);
834	}
835
836	if (hdr->ver != (uint32_t)qp->rx_pkts) {
837		CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). "
838		    "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts);
839		qp->rx_err_ver++;
840		return (EIO);
841	}
842
843	entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
844	if (entry == NULL) {
845		qp->rx_err_no_buf++;
846		CTR0(KTR_NTB, "RX: No entries in rx_pend_q");
847		return (EAGAIN);
848	}
849	callout_stop(&qp->rx_full);
850	CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry);
851
852	entry->x_hdr = hdr;
853	entry->index = qp->rx_index;
854
855	if (hdr->len > entry->len) {
856		CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju",
857		    (uintmax_t)hdr->len, (uintmax_t)entry->len);
858		qp->rx_err_oflow++;
859
860		entry->len = -EIO;
861		entry->flags |= NTBT_DESC_DONE_FLAG;
862
863		taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
864	} else {
865		qp->rx_bytes += hdr->len;
866		qp->rx_pkts++;
867
868		CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts);
869
870		entry->len = hdr->len;
871
872		ntb_memcpy_rx(qp, entry, offset);
873	}
874
875	qp->rx_index++;
876	qp->rx_index %= qp->rx_max_entry;
877	return (0);
878}
879
880static void
881ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
882    void *offset)
883{
884	struct ifnet *ifp = entry->cb_data;
885	unsigned int len = entry->len;
886
887	CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset);
888
889	entry->buf = (void *)m_devget(offset, len, 0, ifp, NULL);
890
891	/* Ensure that the data is globally visible before clearing the flag */
892	wmb();
893
894	CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, m);
895	ntb_rx_copy_callback(qp, entry);
896}
897
898static inline void
899ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data)
900{
901	struct ntb_queue_entry *entry;
902
903	entry = data;
904	entry->flags |= NTBT_DESC_DONE_FLAG;
905	taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
906}
907
908static void
909ntb_complete_rxc(void *arg, int pending)
910{
911	struct ntb_transport_qp *qp = arg;
912	struct ntb_queue_entry *entry;
913	struct mbuf *m;
914	unsigned len;
915
916	CTR0(KTR_NTB, "RX: rx_completion_task");
917
918	mtx_lock_spin(&qp->ntb_rx_q_lock);
919
920	while (!STAILQ_EMPTY(&qp->rx_post_q)) {
921		entry = STAILQ_FIRST(&qp->rx_post_q);
922		if ((entry->flags & NTBT_DESC_DONE_FLAG) == 0)
923			break;
924
925		entry->x_hdr->flags = 0;
926		iowrite32(entry->index, &qp->rx_info->entry);
927
928		STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry);
929
930		len = entry->len;
931		m = entry->buf;
932
933		/*
934		 * Re-initialize queue_entry for reuse; rx_handler takes
935		 * ownership of the mbuf.
936		 */
937		entry->buf = NULL;
938		entry->len = transport_mtu;
939		entry->cb_data = qp->cb_data;
940
941		STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry);
942
943		mtx_unlock_spin(&qp->ntb_rx_q_lock);
944
945		CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m);
946		if (qp->rx_handler != NULL && qp->client_ready)
947			qp->rx_handler(qp, qp->cb_data, m, len);
948		else
949			m_freem(m);
950
951		mtx_lock_spin(&qp->ntb_rx_q_lock);
952	}
953
954	mtx_unlock_spin(&qp->ntb_rx_q_lock);
955}
956
957static void
958ntb_transport_doorbell_callback(void *data, uint32_t vector)
959{
960	struct ntb_transport_ctx *nt = data;
961	struct ntb_transport_qp *qp;
962	struct _qpset db_bits;
963	uint64_t vec_mask;
964	unsigned qp_num;
965
966	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &db_bits);
967	BIT_NAND(QP_SETSIZE, &db_bits, &nt->qp_bitmap_free);
968
969	vec_mask = NTB_DB_VECTOR_MASK(nt->ntb, vector);
970	while (vec_mask != 0) {
971		qp_num = ffsll(vec_mask) - 1;
972
973		if (test_bit(qp_num, &db_bits)) {
974			qp = &nt->qp_vec[qp_num];
975			taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
976		}
977
978		vec_mask &= ~(1ull << qp_num);
979	}
980}
981
982/* Link Event handler */
983static void
984ntb_transport_event_callback(void *data)
985{
986	struct ntb_transport_ctx *nt = data;
987
988	if (NTB_LINK_IS_UP(nt->ntb, NULL, NULL)) {
989		ntb_printf(1, "HW link up\n");
990		callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
991	} else {
992		ntb_printf(1, "HW link down\n");
993		taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup);
994	}
995}
996
997/* Link bring up */
998static void
999ntb_transport_link_work(void *arg)
1000{
1001	struct ntb_transport_ctx *nt = arg;
1002	device_t ntb = nt->ntb;
1003	struct ntb_transport_qp *qp;
1004	uint64_t val64, size;
1005	uint32_t val;
1006	unsigned i;
1007	int rc;
1008
1009	/* send the local info, in the opposite order of the way we read it */
1010	for (i = 0; i < nt->mw_count; i++) {
1011		size = nt->mw_vec[i].phys_size;
1012
1013		if (max_mw_size != 0 && size > max_mw_size)
1014			size = max_mw_size;
1015
1016		NTB_PEER_SPAD_WRITE(ntb, NTBT_MW0_SZ_HIGH + (i * 2),
1017		    size >> 32);
1018		NTB_PEER_SPAD_WRITE(ntb, NTBT_MW0_SZ_LOW + (i * 2), size);
1019	}
1020
1021	NTB_PEER_SPAD_WRITE(ntb, NTBT_NUM_MWS, nt->mw_count);
1022
1023	NTB_PEER_SPAD_WRITE(ntb, NTBT_NUM_QPS, nt->qp_count);
1024
1025	NTB_PEER_SPAD_WRITE(ntb, NTBT_VERSION, NTB_TRANSPORT_VERSION);
1026
1027	/* Query the remote side for its info */
1028	val = 0;
1029	NTB_SPAD_READ(ntb, NTBT_VERSION, &val);
1030	if (val != NTB_TRANSPORT_VERSION)
1031		goto out;
1032
1033	NTB_SPAD_READ(ntb, NTBT_NUM_QPS, &val);
1034	if (val != nt->qp_count)
1035		goto out;
1036
1037	NTB_SPAD_READ(ntb, NTBT_NUM_MWS, &val);
1038	if (val != nt->mw_count)
1039		goto out;
1040
1041	for (i = 0; i < nt->mw_count; i++) {
1042		NTB_SPAD_READ(ntb, NTBT_MW0_SZ_HIGH + (i * 2), &val);
1043		val64 = (uint64_t)val << 32;
1044
1045		NTB_SPAD_READ(ntb, NTBT_MW0_SZ_LOW + (i * 2), &val);
1046		val64 |= val;
1047
1048		rc = ntb_set_mw(nt, i, val64);
1049		if (rc != 0)
1050			goto free_mws;
1051	}
1052
1053	nt->link_is_up = true;
1054	ntb_printf(1, "transport link up\n");
1055
1056	for (i = 0; i < nt->qp_count; i++) {
1057		qp = &nt->qp_vec[i];
1058
1059		ntb_transport_setup_qp_mw(nt, i);
1060
1061		if (qp->client_ready)
1062			callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
1063	}
1064
1065	return;
1066
1067free_mws:
1068	for (i = 0; i < nt->mw_count; i++)
1069		ntb_free_mw(nt, i);
1070out:
1071	if (NTB_LINK_IS_UP(ntb, NULL, NULL))
1072		callout_reset(&nt->link_work,
1073		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt);
1074}
1075
1076static int
1077ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size)
1078{
1079	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
1080	size_t xlat_size, buff_size;
1081	int rc;
1082
1083	if (size == 0)
1084		return (EINVAL);
1085
1086	xlat_size = roundup(size, mw->xlat_align_size);
1087	buff_size = xlat_size;
1088
1089	/* No need to re-setup */
1090	if (mw->xlat_size == xlat_size)
1091		return (0);
1092
1093	if (mw->buff_size != 0)
1094		ntb_free_mw(nt, num_mw);
1095
1096	/* Alloc memory for receiving data.  Must be aligned */
1097	mw->xlat_size = xlat_size;
1098	mw->buff_size = buff_size;
1099
1100	mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_T, M_ZERO, 0,
1101	    mw->addr_limit, mw->xlat_align, 0);
1102	if (mw->virt_addr == NULL) {
1103		ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n",
1104		    mw->buff_size, mw->xlat_size);
1105		mw->xlat_size = 0;
1106		mw->buff_size = 0;
1107		return (ENOMEM);
1108	}
1109	/* TODO: replace with bus_space_* functions */
1110	mw->dma_addr = vtophys(mw->virt_addr);
1111
1112	/*
1113	 * Ensure that the allocation from contigmalloc is aligned as
1114	 * requested.  XXX: This may not be needed -- brought in for parity
1115	 * with the Linux driver.
1116	 */
1117	if (mw->dma_addr % mw->xlat_align != 0) {
1118		ntb_printf(0,
1119		    "DMA memory 0x%jx not aligned to BAR size 0x%zx\n",
1120		    (uintmax_t)mw->dma_addr, size);
1121		ntb_free_mw(nt, num_mw);
1122		return (ENOMEM);
1123	}
1124
1125	/* Notify HW the memory location of the receive buffer */
1126	rc = NTB_MW_SET_TRANS(nt->ntb, num_mw, mw->dma_addr, mw->xlat_size);
1127	if (rc) {
1128		ntb_printf(0, "Unable to set mw%d translation\n", num_mw);
1129		ntb_free_mw(nt, num_mw);
1130		return (rc);
1131	}
1132
1133	return (0);
1134}
1135
1136static void
1137ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
1138{
1139	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
1140
1141	if (mw->virt_addr == NULL)
1142		return;
1143
1144	NTB_MW_CLEAR_TRANS(nt->ntb, num_mw);
1145	contigfree(mw->virt_addr, mw->xlat_size, M_NTB_T);
1146	mw->xlat_size = 0;
1147	mw->buff_size = 0;
1148	mw->virt_addr = NULL;
1149}
1150
1151static int
1152ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num)
1153{
1154	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
1155	struct ntb_transport_mw *mw;
1156	void *offset;
1157	ntb_q_idx_t i;
1158	size_t rx_size;
1159	unsigned num_qps_mw, mw_num, mw_count;
1160
1161	mw_count = nt->mw_count;
1162	mw_num = QP_TO_MW(nt, qp_num);
1163	mw = &nt->mw_vec[mw_num];
1164
1165	if (mw->virt_addr == NULL)
1166		return (ENOMEM);
1167
1168	if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
1169		num_qps_mw = nt->qp_count / mw_count + 1;
1170	else
1171		num_qps_mw = nt->qp_count / mw_count;
1172
1173	rx_size = mw->xlat_size / num_qps_mw;
1174	qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
1175	rx_size -= sizeof(struct ntb_rx_info);
1176
1177	qp->remote_rx_info = (void*)(qp->rx_buff + rx_size);
1178
1179	/* Due to house-keeping, there must be at least 2 buffs */
1180	qp->rx_max_frame = qmin(rx_size / 2,
1181	    transport_mtu + sizeof(struct ntb_payload_header));
1182	qp->rx_max_entry = rx_size / qp->rx_max_frame;
1183	qp->rx_index = 0;
1184
1185	qp->remote_rx_info->entry = qp->rx_max_entry - 1;
1186
1187	/* Set up the hdr offsets with 0s */
1188	for (i = 0; i < qp->rx_max_entry; i++) {
1189		offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) -
1190		    sizeof(struct ntb_payload_header));
1191		memset(offset, 0, sizeof(struct ntb_payload_header));
1192	}
1193
1194	qp->rx_pkts = 0;
1195	qp->tx_pkts = 0;
1196	qp->tx_index = 0;
1197
1198	return (0);
1199}
1200
1201static void
1202ntb_qp_link_work(void *arg)
1203{
1204	struct ntb_transport_qp *qp = arg;
1205	device_t ntb = qp->ntb;
1206	struct ntb_transport_ctx *nt = qp->transport;
1207	uint32_t val, dummy;
1208
1209	NTB_SPAD_READ(ntb, NTBT_QP_LINKS, &val);
1210
1211	NTB_PEER_SPAD_WRITE(ntb, NTBT_QP_LINKS, val | (1ull << qp->qp_num));
1212
1213	/* query remote spad for qp ready bits */
1214	NTB_PEER_SPAD_READ(ntb, NTBT_QP_LINKS, &dummy);
1215
1216	/* See if the remote side is up */
1217	if ((val & (1ull << qp->qp_num)) != 0) {
1218		ntb_printf(2, "qp link up\n");
1219		qp->link_is_up = true;
1220
1221		if (qp->event_handler != NULL)
1222			qp->event_handler(qp->cb_data, NTB_LINK_UP);
1223
1224		taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
1225	} else if (nt->link_is_up)
1226		callout_reset(&qp->link_work,
1227		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
1228}
1229
1230/* Link down event*/
1231static void
1232ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
1233{
1234	struct ntb_transport_qp *qp;
1235	struct _qpset qp_bitmap_alloc;
1236	unsigned i;
1237
1238	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
1239	BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
1240
1241	/* Pass along the info to any clients */
1242	for (i = 0; i < nt->qp_count; i++)
1243		if (test_bit(i, &qp_bitmap_alloc)) {
1244			qp = &nt->qp_vec[i];
1245			ntb_qp_link_cleanup(qp);
1246			callout_drain(&qp->link_work);
1247		}
1248
1249	if (!nt->link_is_up)
1250		callout_drain(&nt->link_work);
1251
1252	/*
1253	 * The scratchpad registers keep the values if the remote side
1254	 * goes down, blast them now to give them a sane value the next
1255	 * time they are accessed
1256	 */
1257	for (i = 0; i < NTBT_MAX_SPAD; i++)
1258		NTB_SPAD_WRITE(nt->ntb, i, 0);
1259}
1260
1261static void
1262ntb_transport_link_cleanup_work(void *arg, int pending __unused)
1263{
1264
1265	ntb_transport_link_cleanup(arg);
1266}
1267
1268static void
1269ntb_qp_link_down(struct ntb_transport_qp *qp)
1270{
1271
1272	ntb_qp_link_cleanup(qp);
1273}
1274
1275static void
1276ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
1277{
1278
1279	qp->link_is_up = false;
1280
1281	qp->tx_index = qp->rx_index = 0;
1282	qp->tx_bytes = qp->rx_bytes = 0;
1283	qp->tx_pkts = qp->rx_pkts = 0;
1284
1285	qp->rx_ring_empty = 0;
1286	qp->tx_ring_full = 0;
1287
1288	qp->rx_err_no_buf = qp->tx_err_no_buf = 0;
1289	qp->rx_err_oflow = qp->rx_err_ver = 0;
1290}
1291
1292static void
1293ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
1294{
1295	struct ntb_transport_ctx *nt = qp->transport;
1296
1297	callout_drain(&qp->link_work);
1298	ntb_qp_link_down_reset(qp);
1299
1300	if (qp->event_handler != NULL)
1301		qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
1302
1303	if (nt->link_is_up)
1304		callout_reset(&qp->link_work,
1305		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
1306}
1307
1308/* Link commanded down */
1309/**
1310 * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
1311 * @qp: NTB transport layer queue to be disabled
1312 *
1313 * Notify NTB transport layer of client's desire to no longer receive data on
1314 * transport queue specified.  It is the client's responsibility to ensure all
1315 * entries on queue are purged or otherwise handled appropriately.
1316 */
1317void
1318ntb_transport_link_down(struct ntb_transport_qp *qp)
1319{
1320	uint32_t val;
1321
1322	if (qp == NULL)
1323		return;
1324
1325	qp->client_ready = false;
1326
1327	NTB_SPAD_READ(qp->ntb, NTBT_QP_LINKS, &val);
1328
1329	NTB_PEER_SPAD_WRITE(qp->ntb, NTBT_QP_LINKS,
1330	   val & ~(1 << qp->qp_num));
1331
1332	if (qp->link_is_up)
1333		ntb_send_link_down(qp);
1334	else
1335		callout_drain(&qp->link_work);
1336}
1337
1338/**
1339 * ntb_transport_link_query - Query transport link state
1340 * @qp: NTB transport layer queue to be queried
1341 *
1342 * Query connectivity to the remote system of the NTB transport queue
1343 *
1344 * RETURNS: true for link up or false for link down
1345 */
1346bool
1347ntb_transport_link_query(struct ntb_transport_qp *qp)
1348{
1349	if (qp == NULL)
1350		return (false);
1351
1352	return (qp->link_is_up);
1353}
1354
1355static void
1356ntb_send_link_down(struct ntb_transport_qp *qp)
1357{
1358	struct ntb_queue_entry *entry;
1359	int i, rc;
1360
1361	if (!qp->link_is_up)
1362		return;
1363
1364	for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
1365		entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
1366		if (entry != NULL)
1367			break;
1368		pause("NTB Wait for link down", hz / 10);
1369	}
1370
1371	if (entry == NULL)
1372		return;
1373
1374	entry->cb_data = NULL;
1375	entry->buf = NULL;
1376	entry->len = 0;
1377	entry->flags = NTBT_LINK_DOWN_FLAG;
1378
1379	mtx_lock(&qp->transport->tx_lock);
1380	rc = ntb_process_tx(qp, entry);
1381	if (rc != 0)
1382		printf("ntb: Failed to send link down\n");
1383	mtx_unlock(&qp->transport->tx_lock);
1384
1385	ntb_qp_link_down_reset(qp);
1386}
1387
1388
1389/* List Management */
1390
1391static void
1392ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
1393    struct ntb_queue_list *list)
1394{
1395
1396	mtx_lock_spin(lock);
1397	STAILQ_INSERT_TAIL(list, entry, entry);
1398	mtx_unlock_spin(lock);
1399}
1400
1401static struct ntb_queue_entry *
1402ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list)
1403{
1404	struct ntb_queue_entry *entry;
1405
1406	mtx_lock_spin(lock);
1407	if (STAILQ_EMPTY(list)) {
1408		entry = NULL;
1409		goto out;
1410	}
1411	entry = STAILQ_FIRST(list);
1412	STAILQ_REMOVE_HEAD(list, entry);
1413out:
1414	mtx_unlock_spin(lock);
1415
1416	return (entry);
1417}
1418
1419static struct ntb_queue_entry *
1420ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from,
1421    struct ntb_queue_list *to)
1422{
1423	struct ntb_queue_entry *entry;
1424
1425	mtx_lock_spin(lock);
1426	if (STAILQ_EMPTY(from)) {
1427		entry = NULL;
1428		goto out;
1429	}
1430	entry = STAILQ_FIRST(from);
1431	STAILQ_REMOVE_HEAD(from, entry);
1432	STAILQ_INSERT_TAIL(to, entry, entry);
1433
1434out:
1435	mtx_unlock_spin(lock);
1436	return (entry);
1437}
1438
1439/**
1440 * ntb_transport_qp_num - Query the qp number
1441 * @qp: NTB transport layer queue to be queried
1442 *
1443 * Query qp number of the NTB transport queue
1444 *
1445 * RETURNS: a zero based number specifying the qp number
1446 */
1447unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp)
1448{
1449	if (qp == NULL)
1450		return 0;
1451
1452	return (qp->qp_num);
1453}
1454
1455/**
1456 * ntb_transport_max_size - Query the max payload size of a qp
1457 * @qp: NTB transport layer queue to be queried
1458 *
1459 * Query the maximum payload size permissible on the given qp
1460 *
1461 * RETURNS: the max payload size of a qp
1462 */
1463unsigned int
1464ntb_transport_max_size(struct ntb_transport_qp *qp)
1465{
1466
1467	if (qp == NULL)
1468		return (0);
1469
1470	return (qp->tx_max_frame - sizeof(struct ntb_payload_header));
1471}
1472
1473unsigned int
1474ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
1475{
1476	unsigned int head = qp->tx_index;
1477	unsigned int tail = qp->remote_rx_info->entry;
1478
1479	return (tail >= head ? tail - head : qp->tx_max_entry + tail - head);
1480}
1481
1482static device_method_t ntb_transport_methods[] = {
1483	/* Device interface */
1484	DEVMETHOD(device_probe,     ntb_transport_probe),
1485	DEVMETHOD(device_attach,    ntb_transport_attach),
1486	DEVMETHOD(device_detach,    ntb_transport_detach),
1487	DEVMETHOD_END
1488};
1489
1490devclass_t ntb_transport_devclass;
1491static DEFINE_CLASS_0(ntb_transport, ntb_transport_driver,
1492    ntb_transport_methods, sizeof(struct ntb_transport_ctx));
1493DRIVER_MODULE(ntb_transport, ntb_hw, ntb_transport_driver,
1494    ntb_transport_devclass, NULL, NULL);
1495MODULE_DEPEND(ntb_transport, ntb, 1, 1, 1);
1496MODULE_VERSION(ntb_transport, 1);
1497