ntb_hw.c revision 314667
1/*-
2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (C) 2013 Intel Corporation
4 * Copyright (C) 2015 EMC Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31 * two or more systems using a PCI-e links, providing remote memory access.
32 *
33 * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
34 *
35 * NOTE: Much of the code in this module is shared with Linux. Any patches may
36 * be picked up and redistributed in Linux with a dual GPL/BSD license.
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: stable/10/sys/dev/ntb/ntb_hw/ntb_hw.c 314667 2017-03-04 13:03:31Z avg $");
41
42#include <sys/param.h>
43#include <sys/kernel.h>
44#include <sys/systm.h>
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/interrupt.h>
48#include <sys/malloc.h>
49#include <sys/module.h>
50#include <sys/mutex.h>
51#include <sys/pciio.h>
52#include <sys/queue.h>
53#include <sys/rman.h>
54#include <sys/sbuf.h>
55#include <sys/sysctl.h>
56#include <vm/vm.h>
57#include <vm/pmap.h>
58#include <machine/bus.h>
59#include <machine/intr_machdep.h>
60#include <machine/pmap.h>
61#include <machine/resource.h>
62#include <dev/pci/pcireg.h>
63#include <dev/pci/pcivar.h>
64
65#include "ntb_regs.h"
66#include "../ntb.h"
67
68#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
69
70#define NTB_HB_TIMEOUT		1 /* second */
71#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
72#define BAR_HIGH_MASK		(~((1ull << 12) - 1))
73
74#define	NTB_MSIX_VER_GUARD	0xaabbccdd
75#define	NTB_MSIX_RECEIVED	0xe0f0e0f0
76
77/*
78 * PCI constants could be somewhere more generic, but aren't defined/used in
79 * pci.c.
80 */
81#define	PCI_MSIX_ENTRY_SIZE		16
82#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
83#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
84#define	PCI_MSIX_ENTRY_DATA		8
85
86enum ntb_device_type {
87	NTB_XEON,
88	NTB_ATOM
89};
90
91/* ntb_conn_type are hardware numbers, cannot change. */
92enum ntb_conn_type {
93	NTB_CONN_TRANSPARENT = 0,
94	NTB_CONN_B2B = 1,
95	NTB_CONN_RP = 2,
96};
97
98enum ntb_b2b_direction {
99	NTB_DEV_USD = 0,
100	NTB_DEV_DSD = 1,
101};
102
103enum ntb_bar {
104	NTB_CONFIG_BAR = 0,
105	NTB_B2B_BAR_1,
106	NTB_B2B_BAR_2,
107	NTB_B2B_BAR_3,
108	NTB_MAX_BARS
109};
110
111enum {
112	NTB_MSIX_GUARD = 0,
113	NTB_MSIX_DATA0,
114	NTB_MSIX_DATA1,
115	NTB_MSIX_DATA2,
116	NTB_MSIX_OFS0,
117	NTB_MSIX_OFS1,
118	NTB_MSIX_OFS2,
119	NTB_MSIX_DONE,
120	NTB_MAX_MSIX_SPAD
121};
122
123/* Device features and workarounds */
124#define HAS_FEATURE(ntb, feature)	\
125	(((ntb)->features & (feature)) != 0)
126
127struct ntb_hw_info {
128	uint32_t		device_id;
129	const char		*desc;
130	enum ntb_device_type	type;
131	uint32_t		features;
132};
133
134struct ntb_pci_bar_info {
135	bus_space_tag_t		pci_bus_tag;
136	bus_space_handle_t	pci_bus_handle;
137	int			pci_resource_id;
138	struct resource		*pci_resource;
139	vm_paddr_t		pbase;
140	caddr_t			vbase;
141	vm_size_t		size;
142	vm_memattr_t		map_mode;
143
144	/* Configuration register offsets */
145	uint32_t		psz_off;
146	uint32_t		ssz_off;
147	uint32_t		pbarxlat_off;
148};
149
150struct ntb_int_info {
151	struct resource	*res;
152	int		rid;
153	void		*tag;
154};
155
156struct ntb_vec {
157	struct ntb_softc	*ntb;
158	uint32_t		num;
159	unsigned		masked;
160};
161
162struct ntb_reg {
163	uint32_t	ntb_ctl;
164	uint32_t	lnk_sta;
165	uint8_t		db_size;
166	unsigned	mw_bar[NTB_MAX_BARS];
167};
168
169struct ntb_alt_reg {
170	uint32_t	db_bell;
171	uint32_t	db_mask;
172	uint32_t	spad;
173};
174
175struct ntb_xlat_reg {
176	uint32_t	bar0_base;
177	uint32_t	bar2_base;
178	uint32_t	bar4_base;
179	uint32_t	bar5_base;
180
181	uint32_t	bar2_xlat;
182	uint32_t	bar4_xlat;
183	uint32_t	bar5_xlat;
184
185	uint32_t	bar2_limit;
186	uint32_t	bar4_limit;
187	uint32_t	bar5_limit;
188};
189
190struct ntb_b2b_addr {
191	uint64_t	bar0_addr;
192	uint64_t	bar2_addr64;
193	uint64_t	bar4_addr64;
194	uint64_t	bar4_addr32;
195	uint64_t	bar5_addr32;
196};
197
198struct ntb_msix_data {
199	uint32_t	nmd_ofs;
200	uint32_t	nmd_data;
201};
202
203struct ntb_softc {
204	/* ntb.c context. Do not move! Must go first! */
205	void			*ntb_store;
206
207	device_t		device;
208	enum ntb_device_type	type;
209	uint32_t		features;
210
211	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
212	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
213	uint32_t		allocated_interrupts;
214
215	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
216	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
217	bool			peer_msix_good;
218	bool			peer_msix_done;
219	struct ntb_pci_bar_info	*peer_lapic_bar;
220	struct callout		peer_msix_work;
221
222	struct callout		heartbeat_timer;
223	struct callout		lr_timer;
224
225	struct ntb_vec		*msix_vec;
226
227	uint32_t		ppd;
228	enum ntb_conn_type	conn_type;
229	enum ntb_b2b_direction	dev_type;
230
231	/* Offset of peer bar0 in B2B BAR */
232	uint64_t			b2b_off;
233	/* Memory window used to access peer bar0 */
234#define B2B_MW_DISABLED			UINT8_MAX
235	uint8_t				b2b_mw_idx;
236	uint32_t			msix_xlat;
237	uint8_t				msix_mw_idx;
238
239	uint8_t				mw_count;
240	uint8_t				spad_count;
241	uint8_t				db_count;
242	uint8_t				db_vec_count;
243	uint8_t				db_vec_shift;
244
245	/* Protects local db_mask. */
246#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
247#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
248#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
249	struct mtx			db_mask_lock;
250
251	volatile uint32_t		ntb_ctl;
252	volatile uint32_t		lnk_sta;
253
254	uint64_t			db_valid_mask;
255	uint64_t			db_link_mask;
256	uint64_t			db_mask;
257	uint64_t			fake_db_bell;	/* NTB_SB01BASE_LOCKUP*/
258
259	int				last_ts;	/* ticks @ last irq */
260
261	const struct ntb_reg		*reg;
262	const struct ntb_alt_reg	*self_reg;
263	const struct ntb_alt_reg	*peer_reg;
264	const struct ntb_xlat_reg	*xlat_reg;
265};
266
267#ifdef __i386__
268static __inline uint64_t
269bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
270    bus_size_t offset)
271{
272
273	return (bus_space_read_4(tag, handle, offset) |
274	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
275}
276
277static __inline void
278bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
279    bus_size_t offset, uint64_t val)
280{
281
282	bus_space_write_4(tag, handle, offset, val);
283	bus_space_write_4(tag, handle, offset + 4, val >> 32);
284}
285#endif
286
287#define intel_ntb_bar_read(SIZE, bar, offset) \
288	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
289	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
290#define intel_ntb_bar_write(SIZE, bar, offset, val) \
291	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
292	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
293#define intel_ntb_reg_read(SIZE, offset) \
294	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
295#define intel_ntb_reg_write(SIZE, offset, val) \
296	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
297#define intel_ntb_mw_read(SIZE, offset) \
298	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
299		offset)
300#define intel_ntb_mw_write(SIZE, offset, val) \
301	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
302		offset, val)
303
304static int intel_ntb_probe(device_t device);
305static int intel_ntb_attach(device_t device);
306static int intel_ntb_detach(device_t device);
307static uint64_t intel_ntb_db_valid_mask(device_t dev);
308static void intel_ntb_spad_clear(device_t dev);
309static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
310static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
311    enum ntb_width *width);
312static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
313    enum ntb_width width);
314static int intel_ntb_link_disable(device_t dev);
315static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
316static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
317
318static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
319static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
320static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
321static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
322    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
323static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
324static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
325    vm_memattr_t);
326static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
327    const char *);
328static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
329static int map_memory_window_bar(struct ntb_softc *ntb,
330    struct ntb_pci_bar_info *bar);
331static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
332static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
333static int intel_ntb_init_isr(struct ntb_softc *ntb);
334static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
335static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
336static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
337static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
338static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
339static void ndev_vec_isr(void *arg);
340static void ndev_irq_isr(void *arg);
341static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
342static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
343static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
344static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
345static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
346static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
347static void intel_ntb_exchange_msix(void *);
348static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
349static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
350static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
351static int intel_ntb_detect_atom(struct ntb_softc *ntb);
352static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
353static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
354static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
355static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
356static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
357    enum ntb_bar regbar);
358static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
359    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
360static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
361    enum ntb_bar idx);
362static int xeon_setup_b2b_mw(struct ntb_softc *,
363    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
364static inline bool link_is_up(struct ntb_softc *ntb);
365static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
366static inline bool atom_link_is_err(struct ntb_softc *ntb);
367static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
368static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
369static void atom_link_hb(void *arg);
370static void recover_atom_link(void *arg);
371static bool intel_ntb_poll_link(struct ntb_softc *ntb);
372static void save_bar_parameters(struct ntb_pci_bar_info *bar);
373static void intel_ntb_sysctl_init(struct ntb_softc *);
374static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
375static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
376static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
377static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
378static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
379
380static unsigned g_ntb_hw_debug_level;
381TUNABLE_INT("hw.ntb.debug_level", &g_ntb_hw_debug_level);
382SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
383    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
384#define intel_ntb_printf(lvl, ...) do {				\
385	if ((lvl) <= g_ntb_hw_debug_level) {			\
386		device_printf(ntb->device, __VA_ARGS__);	\
387	}							\
388} while (0)
389
390#define	_NTB_PAT_UC	0
391#define	_NTB_PAT_WC	1
392#define	_NTB_PAT_WT	4
393#define	_NTB_PAT_WP	5
394#define	_NTB_PAT_WB	6
395#define	_NTB_PAT_UCM	7
396static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
397TUNABLE_INT("hw.ntb.default_mw_pat", &g_ntb_mw_pat);
398SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
399    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
400    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
401    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
402    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
403    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
404    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
405    "UC-: " __XSTRING(_NTB_PAT_UCM));
406
407static inline vm_memattr_t
408intel_ntb_pat_flags(void)
409{
410
411	switch (g_ntb_mw_pat) {
412	case _NTB_PAT_WC:
413		return (VM_MEMATTR_WRITE_COMBINING);
414	case _NTB_PAT_WT:
415		return (VM_MEMATTR_WRITE_THROUGH);
416	case _NTB_PAT_WP:
417		return (VM_MEMATTR_WRITE_PROTECTED);
418	case _NTB_PAT_WB:
419		return (VM_MEMATTR_WRITE_BACK);
420	case _NTB_PAT_UCM:
421		return (VM_MEMATTR_WEAK_UNCACHEABLE);
422	case _NTB_PAT_UC:
423		/* FALLTHROUGH */
424	default:
425		return (VM_MEMATTR_UNCACHEABLE);
426	}
427}
428
429/*
430 * Well, this obviously doesn't belong here, but it doesn't seem to exist
431 * anywhere better yet.
432 */
433static inline const char *
434intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
435{
436
437	switch (pat) {
438	case VM_MEMATTR_WRITE_COMBINING:
439		return ("WRITE_COMBINING");
440	case VM_MEMATTR_WRITE_THROUGH:
441		return ("WRITE_THROUGH");
442	case VM_MEMATTR_WRITE_PROTECTED:
443		return ("WRITE_PROTECTED");
444	case VM_MEMATTR_WRITE_BACK:
445		return ("WRITE_BACK");
446	case VM_MEMATTR_WEAK_UNCACHEABLE:
447		return ("UNCACHED");
448	case VM_MEMATTR_UNCACHEABLE:
449		return ("UNCACHEABLE");
450	default:
451		return ("UNKNOWN");
452	}
453}
454
455static int g_ntb_msix_idx = 1;
456TUNABLE_INT("hw.ntb.msix_mw_idx", &g_ntb_msix_idx);
457SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
458    0, "Use this memory window to access the peer MSIX message complex on "
459    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
460    "Like b2b_mw_idx, negative values index from the last available memory "
461    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
462
463static int g_ntb_mw_idx = -1;
464TUNABLE_INT("hw.ntb.b2b_mw_idx", &g_ntb_mw_idx);
465SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
466    0, "Use this memory window to access the peer NTB registers.  A "
467    "non-negative value starts from the first MW index; a negative value "
468    "starts from the last MW index.  The default is -1, i.e., the last "
469    "available memory window.  Both sides of the NTB MUST set the same "
470    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
471
472/* Hardware owns the low 16 bits of features. */
473#define NTB_BAR_SIZE_4K		(1 << 0)
474#define NTB_SDOORBELL_LOCKUP	(1 << 1)
475#define NTB_SB01BASE_LOCKUP	(1 << 2)
476#define NTB_B2BDOORBELL_BIT14	(1 << 3)
477/* Software/configuration owns the top 16 bits. */
478#define NTB_SPLIT_BAR		(1ull << 16)
479
480#define NTB_FEATURES_STR \
481    "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
482    "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
483
484static struct ntb_hw_info pci_ids[] = {
485	/* XXX: PS/SS IDs left out until they are supported. */
486	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
487		NTB_ATOM, 0 },
488
489	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
490		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
491	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
492		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
493	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
494		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
495		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
496	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
497		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
498		    NTB_SB01BASE_LOCKUP },
499	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
500		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
501		    NTB_SB01BASE_LOCKUP },
502
503	{ 0x00000000, NULL, NTB_ATOM, 0 }
504};
505
506static const struct ntb_reg atom_reg = {
507	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
508	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
509	.db_size = sizeof(uint64_t),
510	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
511};
512
513static const struct ntb_alt_reg atom_pri_reg = {
514	.db_bell = ATOM_PDOORBELL_OFFSET,
515	.db_mask = ATOM_PDBMSK_OFFSET,
516	.spad = ATOM_SPAD_OFFSET,
517};
518
519static const struct ntb_alt_reg atom_b2b_reg = {
520	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
521	.spad = ATOM_B2B_SPAD_OFFSET,
522};
523
524static const struct ntb_xlat_reg atom_sec_xlat = {
525#if 0
526	/* "FIXME" says the Linux driver. */
527	.bar0_base = ATOM_SBAR0BASE_OFFSET,
528	.bar2_base = ATOM_SBAR2BASE_OFFSET,
529	.bar4_base = ATOM_SBAR4BASE_OFFSET,
530
531	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
532	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
533#endif
534
535	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
536	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
537};
538
539static const struct ntb_reg xeon_reg = {
540	.ntb_ctl = XEON_NTBCNTL_OFFSET,
541	.lnk_sta = XEON_LINK_STATUS_OFFSET,
542	.db_size = sizeof(uint16_t),
543	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
544};
545
546static const struct ntb_alt_reg xeon_pri_reg = {
547	.db_bell = XEON_PDOORBELL_OFFSET,
548	.db_mask = XEON_PDBMSK_OFFSET,
549	.spad = XEON_SPAD_OFFSET,
550};
551
552static const struct ntb_alt_reg xeon_b2b_reg = {
553	.db_bell = XEON_B2B_DOORBELL_OFFSET,
554	.spad = XEON_B2B_SPAD_OFFSET,
555};
556
557static const struct ntb_xlat_reg xeon_sec_xlat = {
558	.bar0_base = XEON_SBAR0BASE_OFFSET,
559	.bar2_base = XEON_SBAR2BASE_OFFSET,
560	.bar4_base = XEON_SBAR4BASE_OFFSET,
561	.bar5_base = XEON_SBAR5BASE_OFFSET,
562
563	.bar2_limit = XEON_SBAR2LMT_OFFSET,
564	.bar4_limit = XEON_SBAR4LMT_OFFSET,
565	.bar5_limit = XEON_SBAR5LMT_OFFSET,
566
567	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
568	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
569	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
570};
571
572static struct ntb_b2b_addr xeon_b2b_usd_addr = {
573	.bar0_addr = XEON_B2B_BAR0_ADDR,
574	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
575	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
576	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
577	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
578};
579
580static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
581	.bar0_addr = XEON_B2B_BAR0_ADDR,
582	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
583	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
584	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
585	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
586};
587
588SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
589    "B2B MW segment overrides -- MUST be the same on both sides");
590
591TUNABLE_QUAD("hw.ntb.usd_bar2_addr64", &xeon_b2b_usd_addr.bar2_addr64);
592SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
593    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
594    "hardware, use this 64-bit address on the bus between the NTB devices for "
595    "the window at BAR2, on the upstream side of the link.  MUST be the same "
596    "address on both sides.");
597TUNABLE_QUAD("hw.ntb.usd_bar4_addr64", &xeon_b2b_usd_addr.bar4_addr64);
598SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
599    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
600TUNABLE_QUAD("hw.ntb.usd_bar4_addr32", &xeon_b2b_usd_addr.bar4_addr32);
601SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
602    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
603    "(split-BAR mode).");
604TUNABLE_QUAD("hw.ntb.usd_bar5_addr32", &xeon_b2b_usd_addr.bar5_addr32);
605SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
606    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
607    "(split-BAR mode).");
608
609TUNABLE_QUAD("hw.ntb.dsd_bar2_addr64", &xeon_b2b_dsd_addr.bar2_addr64);
610SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
611    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
612    "hardware, use this 64-bit address on the bus between the NTB devices for "
613    "the window at BAR2, on the downstream side of the link.  MUST be the same"
614    " address on both sides.");
615TUNABLE_QUAD("hw.ntb.dsd_bar4_addr64", &xeon_b2b_dsd_addr.bar4_addr64);
616SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
617    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
618TUNABLE_QUAD("hw.ntb.dsd_bar4_addr32", &xeon_b2b_dsd_addr.bar4_addr32);
619SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
620    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
621    "(split-BAR mode).");
622TUNABLE_QUAD("hw.ntb.dsd_bar5_addr32", &xeon_b2b_dsd_addr.bar5_addr32);
623SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
624    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
625    "(split-BAR mode).");
626
627/*
628 * OS <-> Driver interface structures
629 */
630MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
631
632/*
633 * OS <-> Driver linkage functions
634 */
635static int
636intel_ntb_probe(device_t device)
637{
638	struct ntb_hw_info *p;
639
640	p = intel_ntb_get_device_info(pci_get_devid(device));
641	if (p == NULL)
642		return (ENXIO);
643
644	device_set_desc(device, p->desc);
645	return (0);
646}
647
648static int
649intel_ntb_attach(device_t device)
650{
651	struct ntb_softc *ntb;
652	struct ntb_hw_info *p;
653	int error;
654
655	ntb = device_get_softc(device);
656	p = intel_ntb_get_device_info(pci_get_devid(device));
657
658	ntb->device = device;
659	ntb->type = p->type;
660	ntb->features = p->features;
661	ntb->b2b_mw_idx = B2B_MW_DISABLED;
662	ntb->msix_mw_idx = B2B_MW_DISABLED;
663
664	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
665	callout_init(&ntb->heartbeat_timer, 1);
666	callout_init(&ntb->lr_timer, 1);
667	callout_init(&ntb->peer_msix_work, 1);
668	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
669
670	if (ntb->type == NTB_ATOM)
671		error = intel_ntb_detect_atom(ntb);
672	else
673		error = intel_ntb_detect_xeon(ntb);
674	if (error != 0)
675		goto out;
676
677	intel_ntb_detect_max_mw(ntb);
678
679	pci_enable_busmaster(ntb->device);
680
681	error = intel_ntb_map_pci_bars(ntb);
682	if (error != 0)
683		goto out;
684	if (ntb->type == NTB_ATOM)
685		error = intel_ntb_atom_init_dev(ntb);
686	else
687		error = intel_ntb_xeon_init_dev(ntb);
688	if (error != 0)
689		goto out;
690
691	intel_ntb_spad_clear(device);
692
693	intel_ntb_poll_link(ntb);
694
695	intel_ntb_sysctl_init(ntb);
696
697	/* Attach children to this controller */
698	error = ntb_register_device(device);
699
700out:
701	if (error != 0)
702		intel_ntb_detach(device);
703	return (error);
704}
705
706static int
707intel_ntb_detach(device_t device)
708{
709	struct ntb_softc *ntb;
710
711	ntb = device_get_softc(device);
712
713	/* Detach & delete all children */
714	ntb_unregister_device(device);
715
716	if (ntb->self_reg != NULL) {
717		DB_MASK_LOCK(ntb);
718		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
719		DB_MASK_UNLOCK(ntb);
720	}
721	callout_drain(&ntb->heartbeat_timer);
722	callout_drain(&ntb->lr_timer);
723	callout_drain(&ntb->peer_msix_work);
724	pci_disable_busmaster(ntb->device);
725	if (ntb->type == NTB_XEON)
726		intel_ntb_teardown_xeon(ntb);
727	intel_ntb_teardown_interrupts(ntb);
728
729	mtx_destroy(&ntb->db_mask_lock);
730
731	intel_ntb_unmap_pci_bar(ntb);
732
733	return (0);
734}
735
736/*
737 * Driver internal routines
738 */
739static inline enum ntb_bar
740intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
741{
742
743	KASSERT(mw < ntb->mw_count,
744	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
745	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
746
747	return (ntb->reg->mw_bar[mw]);
748}
749
750static inline bool
751bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
752{
753	/* XXX This assertion could be stronger. */
754	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
755	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
756}
757
758static inline void
759bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
760    uint32_t *xlat, uint32_t *lmt)
761{
762	uint32_t basev, lmtv, xlatv;
763
764	switch (bar) {
765	case NTB_B2B_BAR_1:
766		basev = ntb->xlat_reg->bar2_base;
767		lmtv = ntb->xlat_reg->bar2_limit;
768		xlatv = ntb->xlat_reg->bar2_xlat;
769		break;
770	case NTB_B2B_BAR_2:
771		basev = ntb->xlat_reg->bar4_base;
772		lmtv = ntb->xlat_reg->bar4_limit;
773		xlatv = ntb->xlat_reg->bar4_xlat;
774		break;
775	case NTB_B2B_BAR_3:
776		basev = ntb->xlat_reg->bar5_base;
777		lmtv = ntb->xlat_reg->bar5_limit;
778		xlatv = ntb->xlat_reg->bar5_xlat;
779		break;
780	default:
781		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
782		    ("bad bar"));
783		basev = lmtv = xlatv = 0;
784		break;
785	}
786
787	if (base != NULL)
788		*base = basev;
789	if (xlat != NULL)
790		*xlat = xlatv;
791	if (lmt != NULL)
792		*lmt = lmtv;
793}
794
795static int
796intel_ntb_map_pci_bars(struct ntb_softc *ntb)
797{
798	int rc;
799
800	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
801	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
802	if (rc != 0)
803		goto out;
804
805	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
806	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
807	if (rc != 0)
808		goto out;
809	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
810	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
811	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
812
813	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
814	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
815	if (rc != 0)
816		goto out;
817	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
818	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
819	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
820
821	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
822		goto out;
823
824	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
825	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
826	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
827	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
828	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
829
830out:
831	if (rc != 0)
832		device_printf(ntb->device,
833		    "unable to allocate pci resource\n");
834	return (rc);
835}
836
837static void
838print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
839    const char *kind)
840{
841
842	device_printf(ntb->device,
843	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
844	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
845	    (char *)bar->vbase + bar->size - 1,
846	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
847	    (uintmax_t)bar->size, kind);
848}
849
850static int
851map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
852{
853
854	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
855	    &bar->pci_resource_id, RF_ACTIVE);
856	if (bar->pci_resource == NULL)
857		return (ENXIO);
858
859	save_bar_parameters(bar);
860	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
861	print_map_success(ntb, bar, "mmr");
862	return (0);
863}
864
865static int
866map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
867{
868	int rc;
869	vm_memattr_t mapmode;
870	uint8_t bar_size_bits = 0;
871
872	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
873	    &bar->pci_resource_id, RF_ACTIVE);
874
875	if (bar->pci_resource == NULL)
876		return (ENXIO);
877
878	save_bar_parameters(bar);
879	/*
880	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
881	 * hardware issue. To work around this, query the size it should be
882	 * configured to by the device and modify the resource to correspond to
883	 * this new size. The BIOS on systems with this problem is required to
884	 * provide enough address space to allow the driver to make this change
885	 * safely.
886	 *
887	 * Ideally I could have just specified the size when I allocated the
888	 * resource like:
889	 *  bus_alloc_resource(ntb->device,
890	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
891	 *	1ul << bar_size_bits, RF_ACTIVE);
892	 * but the PCI driver does not honor the size in this call, so we have
893	 * to modify it after the fact.
894	 */
895	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
896		if (bar->pci_resource_id == PCIR_BAR(2))
897			bar_size_bits = pci_read_config(ntb->device,
898			    XEON_PBAR23SZ_OFFSET, 1);
899		else
900			bar_size_bits = pci_read_config(ntb->device,
901			    XEON_PBAR45SZ_OFFSET, 1);
902
903		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
904		    bar->pci_resource, bar->pbase,
905		    bar->pbase + (1ul << bar_size_bits) - 1);
906		if (rc != 0) {
907			device_printf(ntb->device,
908			    "unable to resize bar\n");
909			return (rc);
910		}
911
912		save_bar_parameters(bar);
913	}
914
915	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
916	print_map_success(ntb, bar, "mw");
917
918	/*
919	 * Optionally, mark MW BARs as anything other than UC to improve
920	 * performance.
921	 */
922	mapmode = intel_ntb_pat_flags();
923	if (mapmode == bar->map_mode)
924		return (0);
925
926	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
927	if (rc == 0) {
928		bar->map_mode = mapmode;
929		device_printf(ntb->device,
930		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
931		    "%s.\n",
932		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
933		    (char *)bar->vbase + bar->size - 1,
934		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
935		    intel_ntb_vm_memattr_to_str(mapmode));
936	} else
937		device_printf(ntb->device,
938		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
939		    "%s: %d\n",
940		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
941		    (char *)bar->vbase + bar->size - 1,
942		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
943		    intel_ntb_vm_memattr_to_str(mapmode), rc);
944		/* Proceed anyway */
945	return (0);
946}
947
948static void
949intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
950{
951	struct ntb_pci_bar_info *current_bar;
952	int i;
953
954	for (i = 0; i < NTB_MAX_BARS; i++) {
955		current_bar = &ntb->bar_info[i];
956		if (current_bar->pci_resource != NULL)
957			bus_release_resource(ntb->device, SYS_RES_MEMORY,
958			    current_bar->pci_resource_id,
959			    current_bar->pci_resource);
960	}
961}
962
963static int
964intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
965{
966	uint32_t i;
967	int rc;
968
969	for (i = 0; i < num_vectors; i++) {
970		ntb->int_info[i].rid = i + 1;
971		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
972		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
973		if (ntb->int_info[i].res == NULL) {
974			device_printf(ntb->device,
975			    "bus_alloc_resource failed\n");
976			return (ENOMEM);
977		}
978		ntb->int_info[i].tag = NULL;
979		ntb->allocated_interrupts++;
980		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
981		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
982		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
983		if (rc != 0) {
984			device_printf(ntb->device, "bus_setup_intr failed\n");
985			return (ENXIO);
986		}
987	}
988	return (0);
989}
990
991/*
992 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
993 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
994 * should be okay.  This tunable should enable us to test that hypothesis
995 * when someone gets their hands on some Xeon hardware.
996 */
997static int ntb_force_remap_mode;
998TUNABLE_INT("hw.ntb.force_remap_mode", &ntb_force_remap_mode);
999SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
1000    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
1001    " to a smaller number of ithreads, even if the desired number are "
1002    "available");
1003
1004/*
1005 * In case it is NOT ok, give consumers an abort button.
1006 */
1007static int ntb_prefer_intx;
1008TUNABLE_INT("hw.ntb.prefer_intx_to_remap", &ntb_prefer_intx);
1009SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
1010    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
1011    "than remapping MSI-X messages over available slots (match Linux driver "
1012    "behavior)");
1013
1014/*
1015 * Remap the desired number of MSI-X messages to available ithreads in a simple
1016 * round-robin fashion.
1017 */
1018static int
1019intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1020{
1021	u_int *vectors;
1022	uint32_t i;
1023	int rc;
1024
1025	if (ntb_prefer_intx != 0)
1026		return (ENXIO);
1027
1028	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1029
1030	for (i = 0; i < desired; i++)
1031		vectors[i] = (i % avail) + 1;
1032
1033	rc = pci_remap_msix(dev, desired, vectors);
1034	free(vectors, M_NTB);
1035	return (rc);
1036}
1037
1038static int
1039intel_ntb_init_isr(struct ntb_softc *ntb)
1040{
1041	uint32_t desired_vectors, num_vectors;
1042	int rc;
1043
1044	ntb->allocated_interrupts = 0;
1045	ntb->last_ts = ticks;
1046
1047	/*
1048	 * Mask all doorbell interrupts.  (Except link events!)
1049	 */
1050	DB_MASK_LOCK(ntb);
1051	ntb->db_mask = ntb->db_valid_mask;
1052	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1053	DB_MASK_UNLOCK(ntb);
1054
1055	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1056	    ntb->db_count);
1057	if (desired_vectors >= 1) {
1058		rc = pci_alloc_msix(ntb->device, &num_vectors);
1059
1060		if (ntb_force_remap_mode != 0 && rc == 0 &&
1061		    num_vectors == desired_vectors)
1062			num_vectors--;
1063
1064		if (rc == 0 && num_vectors < desired_vectors) {
1065			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
1066			    num_vectors);
1067			if (rc == 0)
1068				num_vectors = desired_vectors;
1069			else
1070				pci_release_msi(ntb->device);
1071		}
1072		if (rc != 0)
1073			num_vectors = 1;
1074	} else
1075		num_vectors = 1;
1076
1077	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
1078		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1079			device_printf(ntb->device,
1080			    "Errata workaround does not support MSI or INTX\n");
1081			return (EINVAL);
1082		}
1083
1084		ntb->db_vec_count = 1;
1085		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1086		rc = intel_ntb_setup_legacy_interrupt(ntb);
1087	} else {
1088		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1089		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1090			device_printf(ntb->device,
1091			    "Errata workaround expects %d doorbell bits\n",
1092			    XEON_NONLINK_DB_MSIX_BITS);
1093			return (EINVAL);
1094		}
1095
1096		intel_ntb_create_msix_vec(ntb, num_vectors);
1097		rc = intel_ntb_setup_msix(ntb, num_vectors);
1098	}
1099	if (rc != 0) {
1100		device_printf(ntb->device,
1101		    "Error allocating interrupts: %d\n", rc);
1102		intel_ntb_free_msix_vec(ntb);
1103	}
1104
1105	return (rc);
1106}
1107
1108static int
1109intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1110{
1111	int rc;
1112
1113	ntb->int_info[0].rid = 0;
1114	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1115	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1116	if (ntb->int_info[0].res == NULL) {
1117		device_printf(ntb->device, "bus_alloc_resource failed\n");
1118		return (ENOMEM);
1119	}
1120
1121	ntb->int_info[0].tag = NULL;
1122	ntb->allocated_interrupts = 1;
1123
1124	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1125	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1126	    ntb, &ntb->int_info[0].tag);
1127	if (rc != 0) {
1128		device_printf(ntb->device, "bus_setup_intr failed\n");
1129		return (ENXIO);
1130	}
1131
1132	return (0);
1133}
1134
1135static void
1136intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
1137{
1138	struct ntb_int_info *current_int;
1139	int i;
1140
1141	for (i = 0; i < ntb->allocated_interrupts; i++) {
1142		current_int = &ntb->int_info[i];
1143		if (current_int->tag != NULL)
1144			bus_teardown_intr(ntb->device, current_int->res,
1145			    current_int->tag);
1146
1147		if (current_int->res != NULL)
1148			bus_release_resource(ntb->device, SYS_RES_IRQ,
1149			    rman_get_rid(current_int->res), current_int->res);
1150	}
1151
1152	intel_ntb_free_msix_vec(ntb);
1153	pci_release_msi(ntb->device);
1154}
1155
1156/*
1157 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
1158 * out to make code clearer.
1159 */
1160static inline uint64_t
1161db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1162{
1163
1164	if (ntb->type == NTB_ATOM)
1165		return (intel_ntb_reg_read(8, regoff));
1166
1167	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1168
1169	return (intel_ntb_reg_read(2, regoff));
1170}
1171
1172static inline void
1173db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1174{
1175
1176	KASSERT((val & ~ntb->db_valid_mask) == 0,
1177	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1178	     (uintmax_t)(val & ~ntb->db_valid_mask),
1179	     (uintmax_t)ntb->db_valid_mask));
1180
1181	if (regoff == ntb->self_reg->db_mask)
1182		DB_MASK_ASSERT(ntb, MA_OWNED);
1183	db_iowrite_raw(ntb, regoff, val);
1184}
1185
1186static inline void
1187db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1188{
1189
1190	if (ntb->type == NTB_ATOM) {
1191		intel_ntb_reg_write(8, regoff, val);
1192		return;
1193	}
1194
1195	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1196	intel_ntb_reg_write(2, regoff, (uint16_t)val);
1197}
1198
1199static void
1200intel_ntb_db_set_mask(device_t dev, uint64_t bits)
1201{
1202	struct ntb_softc *ntb = device_get_softc(dev);
1203
1204	DB_MASK_LOCK(ntb);
1205	ntb->db_mask |= bits;
1206	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1207		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1208	DB_MASK_UNLOCK(ntb);
1209}
1210
1211static void
1212intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
1213{
1214	struct ntb_softc *ntb = device_get_softc(dev);
1215	uint64_t ibits;
1216	int i;
1217
1218	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1219	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1220	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1221	     (uintmax_t)ntb->db_valid_mask));
1222
1223	DB_MASK_LOCK(ntb);
1224	ibits = ntb->fake_db_bell & ntb->db_mask & bits;
1225	ntb->db_mask &= ~bits;
1226	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1227		/* Simulate fake interrupts if unmasked DB bits are set. */
1228		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1229			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
1230				swi_sched(ntb->int_info[i].tag, 0);
1231		}
1232	} else {
1233		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1234	}
1235	DB_MASK_UNLOCK(ntb);
1236}
1237
1238static uint64_t
1239intel_ntb_db_read(device_t dev)
1240{
1241	struct ntb_softc *ntb = device_get_softc(dev);
1242
1243	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1244		return (ntb->fake_db_bell);
1245
1246	return (db_ioread(ntb, ntb->self_reg->db_bell));
1247}
1248
1249static void
1250intel_ntb_db_clear(device_t dev, uint64_t bits)
1251{
1252	struct ntb_softc *ntb = device_get_softc(dev);
1253
1254	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1255	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1256	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1257	     (uintmax_t)ntb->db_valid_mask));
1258
1259	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1260		DB_MASK_LOCK(ntb);
1261		ntb->fake_db_bell &= ~bits;
1262		DB_MASK_UNLOCK(ntb);
1263		return;
1264	}
1265
1266	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1267}
1268
1269static inline uint64_t
1270intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1271{
1272	uint64_t shift, mask;
1273
1274	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1275		/*
1276		 * Remap vectors in custom way to make at least first
1277		 * three doorbells to not generate stray events.
1278		 * This breaks Linux compatibility (if one existed)
1279		 * when more then one DB is used (not by if_ntb).
1280		 */
1281		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
1282			return (1 << db_vector);
1283		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
1284			return (0x7ffc);
1285	}
1286
1287	shift = ntb->db_vec_shift;
1288	mask = (1ull << shift) - 1;
1289	return (mask << (shift * db_vector));
1290}
1291
1292static void
1293intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1294{
1295	uint64_t vec_mask;
1296
1297	ntb->last_ts = ticks;
1298	vec_mask = intel_ntb_vec_mask(ntb, vec);
1299
1300	if ((vec_mask & ntb->db_link_mask) != 0) {
1301		if (intel_ntb_poll_link(ntb))
1302			ntb_link_event(ntb->device);
1303	}
1304
1305	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1306	    (vec_mask & ntb->db_link_mask) == 0) {
1307		DB_MASK_LOCK(ntb);
1308
1309		/* Do not report same DB events again if not cleared yet. */
1310		vec_mask &= ~ntb->fake_db_bell;
1311
1312		/* Update our internal doorbell register. */
1313		ntb->fake_db_bell |= vec_mask;
1314
1315		/* Do not report masked DB events. */
1316		vec_mask &= ~ntb->db_mask;
1317
1318		DB_MASK_UNLOCK(ntb);
1319	}
1320
1321	if ((vec_mask & ntb->db_valid_mask) != 0)
1322		ntb_db_event(ntb->device, vec);
1323}
1324
1325static void
1326ndev_vec_isr(void *arg)
1327{
1328	struct ntb_vec *nvec = arg;
1329
1330	intel_ntb_interrupt(nvec->ntb, nvec->num);
1331}
1332
1333static void
1334ndev_irq_isr(void *arg)
1335{
1336	/* If we couldn't set up MSI-X, we only have the one vector. */
1337	intel_ntb_interrupt(arg, 0);
1338}
1339
1340static int
1341intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1342{
1343	uint32_t i;
1344
1345	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1346	    M_ZERO | M_WAITOK);
1347	for (i = 0; i < num_vectors; i++) {
1348		ntb->msix_vec[i].num = i;
1349		ntb->msix_vec[i].ntb = ntb;
1350	}
1351
1352	return (0);
1353}
1354
1355static void
1356intel_ntb_free_msix_vec(struct ntb_softc *ntb)
1357{
1358
1359	if (ntb->msix_vec == NULL)
1360		return;
1361
1362	free(ntb->msix_vec, M_NTB);
1363	ntb->msix_vec = NULL;
1364}
1365
1366static void
1367intel_ntb_get_msix_info(struct ntb_softc *ntb)
1368{
1369	struct pci_devinfo *dinfo;
1370	struct pcicfg_msix *msix;
1371	uint32_t laddr, data, i, offset;
1372
1373	dinfo = device_get_ivars(ntb->device);
1374	msix = &dinfo->cfg.msix;
1375
1376	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1377
1378	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1379		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1380
1381		laddr = bus_read_4(msix->msix_table_res, offset +
1382		    PCI_MSIX_ENTRY_LOWER_ADDR);
1383		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
1384
1385		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1386		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1387		     MSI_INTEL_ADDR_BASE));
1388		ntb->msix_data[i].nmd_ofs = laddr;
1389
1390		data = bus_read_4(msix->msix_table_res, offset +
1391		    PCI_MSIX_ENTRY_DATA);
1392		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1393
1394		ntb->msix_data[i].nmd_data = data;
1395	}
1396}
1397
1398static struct ntb_hw_info *
1399intel_ntb_get_device_info(uint32_t device_id)
1400{
1401	struct ntb_hw_info *ep = pci_ids;
1402
1403	while (ep->device_id) {
1404		if (ep->device_id == device_id)
1405			return (ep);
1406		++ep;
1407	}
1408	return (NULL);
1409}
1410
1411static void
1412intel_ntb_teardown_xeon(struct ntb_softc *ntb)
1413{
1414
1415	if (ntb->reg != NULL)
1416		intel_ntb_link_disable(ntb->device);
1417}
1418
1419static void
1420intel_ntb_detect_max_mw(struct ntb_softc *ntb)
1421{
1422
1423	if (ntb->type == NTB_ATOM) {
1424		ntb->mw_count = ATOM_MW_COUNT;
1425		return;
1426	}
1427
1428	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1429		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1430	else
1431		ntb->mw_count = XEON_SNB_MW_COUNT;
1432}
1433
1434static int
1435intel_ntb_detect_xeon(struct ntb_softc *ntb)
1436{
1437	uint8_t ppd, conn_type;
1438
1439	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1440	ntb->ppd = ppd;
1441
1442	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1443		ntb->dev_type = NTB_DEV_DSD;
1444	else
1445		ntb->dev_type = NTB_DEV_USD;
1446
1447	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1448		ntb->features |= NTB_SPLIT_BAR;
1449
1450	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1451	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1452		device_printf(ntb->device,
1453		    "Can not apply SB01BASE_LOCKUP workaround "
1454		    "with split BARs disabled!\n");
1455		device_printf(ntb->device,
1456		    "Expect system hangs under heavy NTB traffic!\n");
1457		ntb->features &= ~NTB_SB01BASE_LOCKUP;
1458	}
1459
1460	/*
1461	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1462	 * errata workaround; only do one at a time.
1463	 */
1464	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1465		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1466
1467	conn_type = ppd & XEON_PPD_CONN_TYPE;
1468	switch (conn_type) {
1469	case NTB_CONN_B2B:
1470		ntb->conn_type = conn_type;
1471		break;
1472	case NTB_CONN_RP:
1473	case NTB_CONN_TRANSPARENT:
1474	default:
1475		device_printf(ntb->device, "Unsupported connection type: %u\n",
1476		    (unsigned)conn_type);
1477		return (ENXIO);
1478	}
1479	return (0);
1480}
1481
1482static int
1483intel_ntb_detect_atom(struct ntb_softc *ntb)
1484{
1485	uint32_t ppd, conn_type;
1486
1487	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1488	ntb->ppd = ppd;
1489
1490	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1491		ntb->dev_type = NTB_DEV_DSD;
1492	else
1493		ntb->dev_type = NTB_DEV_USD;
1494
1495	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1496	switch (conn_type) {
1497	case NTB_CONN_B2B:
1498		ntb->conn_type = conn_type;
1499		break;
1500	default:
1501		device_printf(ntb->device, "Unsupported NTB configuration\n");
1502		return (ENXIO);
1503	}
1504	return (0);
1505}
1506
1507static int
1508intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
1509{
1510	int rc;
1511
1512	ntb->spad_count		= XEON_SPAD_COUNT;
1513	ntb->db_count		= XEON_DB_COUNT;
1514	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1515	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1516	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1517
1518	if (ntb->conn_type != NTB_CONN_B2B) {
1519		device_printf(ntb->device, "Connection type %d not supported\n",
1520		    ntb->conn_type);
1521		return (ENXIO);
1522	}
1523
1524	ntb->reg = &xeon_reg;
1525	ntb->self_reg = &xeon_pri_reg;
1526	ntb->peer_reg = &xeon_b2b_reg;
1527	ntb->xlat_reg = &xeon_sec_xlat;
1528
1529	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1530		ntb->fake_db_bell = 0;
1531		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1532		    ntb->mw_count;
1533		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1534		    g_ntb_msix_idx, ntb->msix_mw_idx);
1535		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1536		    VM_MEMATTR_UNCACHEABLE);
1537		KASSERT(rc == 0, ("shouldn't fail"));
1538	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
1539		/*
1540		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1541		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1542		 * which may hang the system.  To workaround this, use a memory
1543		 * window to access the interrupt and scratch pad registers on the
1544		 * remote system.
1545		 */
1546		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1547		    ntb->mw_count;
1548		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1549		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1550		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1551		    VM_MEMATTR_UNCACHEABLE);
1552		KASSERT(rc == 0, ("shouldn't fail"));
1553	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
1554		/*
1555		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1556		 * mirrored to the remote system.  Shrink the number of bits by one,
1557		 * since bit 14 is the last bit.
1558		 *
1559		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1560		 * anyway.  Nor for non-B2B connection types.
1561		 */
1562		ntb->db_count = XEON_DB_COUNT - 1;
1563
1564	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1565
1566	if (ntb->dev_type == NTB_DEV_USD)
1567		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1568		    &xeon_b2b_usd_addr);
1569	else
1570		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1571		    &xeon_b2b_dsd_addr);
1572	if (rc != 0)
1573		return (rc);
1574
1575	/* Enable Bus Master and Memory Space on the secondary side */
1576	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1577	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1578
1579	/*
1580	 * Mask all doorbell interrupts.
1581	 */
1582	DB_MASK_LOCK(ntb);
1583	ntb->db_mask = ntb->db_valid_mask;
1584	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1585	DB_MASK_UNLOCK(ntb);
1586
1587	rc = intel_ntb_init_isr(ntb);
1588	return (rc);
1589}
1590
1591static int
1592intel_ntb_atom_init_dev(struct ntb_softc *ntb)
1593{
1594	int error;
1595
1596	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1597	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1598
1599	ntb->spad_count		 = ATOM_SPAD_COUNT;
1600	ntb->db_count		 = ATOM_DB_COUNT;
1601	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1602	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1603	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1604
1605	ntb->reg = &atom_reg;
1606	ntb->self_reg = &atom_pri_reg;
1607	ntb->peer_reg = &atom_b2b_reg;
1608	ntb->xlat_reg = &atom_sec_xlat;
1609
1610	/*
1611	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1612	 * resolved.  Mask transaction layer internal parity errors.
1613	 */
1614	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1615
1616	configure_atom_secondary_side_bars(ntb);
1617
1618	/* Enable Bus Master and Memory Space on the secondary side */
1619	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1620	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1621
1622	error = intel_ntb_init_isr(ntb);
1623	if (error != 0)
1624		return (error);
1625
1626	/* Initiate PCI-E link training */
1627	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1628
1629	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1630
1631	return (0);
1632}
1633
1634/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1635static void
1636configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1637{
1638
1639	if (ntb->dev_type == NTB_DEV_USD) {
1640		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1641		    XEON_B2B_BAR2_ADDR64);
1642		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1643		    XEON_B2B_BAR4_ADDR64);
1644		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1645		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1646	} else {
1647		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1648		    XEON_B2B_BAR2_ADDR64);
1649		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1650		    XEON_B2B_BAR4_ADDR64);
1651		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1652		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1653	}
1654}
1655
1656
1657/*
1658 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1659 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1660 * remains for use by a higher layer.
1661 *
1662 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1663 * MW size is sufficiently large.
1664 */
1665static unsigned int ntb_b2b_mw_share;
1666TUNABLE_INT("hw.ntb.b2b_mw_share", &ntb_b2b_mw_share);
1667SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1668    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1669    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1670    "value here.");
1671
1672static void
1673xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1674    enum ntb_bar regbar)
1675{
1676	struct ntb_pci_bar_info *bar;
1677	uint8_t bar_sz;
1678
1679	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1680		return;
1681
1682	bar = &ntb->bar_info[idx];
1683	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1684	if (idx == regbar) {
1685		if (ntb->b2b_off != 0)
1686			bar_sz--;
1687		else
1688			bar_sz = 0;
1689	}
1690	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1691	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1692	(void)bar_sz;
1693}
1694
1695static void
1696xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1697    enum ntb_bar idx, enum ntb_bar regbar)
1698{
1699	uint64_t reg_val;
1700	uint32_t base_reg, lmt_reg;
1701
1702	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1703	if (idx == regbar) {
1704		if (ntb->b2b_off)
1705			bar_addr += ntb->b2b_off;
1706		else
1707			bar_addr = 0;
1708	}
1709
1710	if (!bar_is_64bit(ntb, idx)) {
1711		intel_ntb_reg_write(4, base_reg, bar_addr);
1712		reg_val = intel_ntb_reg_read(4, base_reg);
1713		(void)reg_val;
1714
1715		intel_ntb_reg_write(4, lmt_reg, bar_addr);
1716		reg_val = intel_ntb_reg_read(4, lmt_reg);
1717		(void)reg_val;
1718	} else {
1719		intel_ntb_reg_write(8, base_reg, bar_addr);
1720		reg_val = intel_ntb_reg_read(8, base_reg);
1721		(void)reg_val;
1722
1723		intel_ntb_reg_write(8, lmt_reg, bar_addr);
1724		reg_val = intel_ntb_reg_read(8, lmt_reg);
1725		(void)reg_val;
1726	}
1727}
1728
1729static void
1730xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1731{
1732	struct ntb_pci_bar_info *bar;
1733
1734	bar = &ntb->bar_info[idx];
1735	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1736		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1737		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
1738	} else {
1739		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1740		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
1741	}
1742	(void)base_addr;
1743}
1744
1745static int
1746xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1747    const struct ntb_b2b_addr *peer_addr)
1748{
1749	struct ntb_pci_bar_info *b2b_bar;
1750	vm_size_t bar_size;
1751	uint64_t bar_addr;
1752	enum ntb_bar b2b_bar_num, i;
1753
1754	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1755		b2b_bar = NULL;
1756		b2b_bar_num = NTB_CONFIG_BAR;
1757		ntb->b2b_off = 0;
1758	} else {
1759		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1760		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1761		    ("invalid b2b mw bar"));
1762
1763		b2b_bar = &ntb->bar_info[b2b_bar_num];
1764		bar_size = b2b_bar->size;
1765
1766		if (ntb_b2b_mw_share != 0 &&
1767		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1768			ntb->b2b_off = bar_size >> 1;
1769		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1770			ntb->b2b_off = 0;
1771		} else {
1772			device_printf(ntb->device,
1773			    "B2B bar size is too small!\n");
1774			return (EIO);
1775		}
1776	}
1777
1778	/*
1779	 * Reset the secondary bar sizes to match the primary bar sizes.
1780	 * (Except, disable or halve the size of the B2B secondary bar.)
1781	 */
1782	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1783		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1784
1785	bar_addr = 0;
1786	if (b2b_bar_num == NTB_CONFIG_BAR)
1787		bar_addr = addr->bar0_addr;
1788	else if (b2b_bar_num == NTB_B2B_BAR_1)
1789		bar_addr = addr->bar2_addr64;
1790	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1791		bar_addr = addr->bar4_addr64;
1792	else if (b2b_bar_num == NTB_B2B_BAR_2)
1793		bar_addr = addr->bar4_addr32;
1794	else if (b2b_bar_num == NTB_B2B_BAR_3)
1795		bar_addr = addr->bar5_addr32;
1796	else
1797		KASSERT(false, ("invalid bar"));
1798
1799	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1800
1801	/*
1802	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1803	 * register BAR.  The B2B BAR is either disabled above or configured
1804	 * half-size.  It starts at PBAR xlat + offset.
1805	 *
1806	 * Also set up incoming BAR limits == base (zero length window).
1807	 */
1808	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1809	    b2b_bar_num);
1810	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1811		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1812		    NTB_B2B_BAR_2, b2b_bar_num);
1813		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1814		    NTB_B2B_BAR_3, b2b_bar_num);
1815	} else
1816		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1817		    NTB_B2B_BAR_2, b2b_bar_num);
1818
1819	/* Zero incoming translation addrs */
1820	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1821	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1822
1823	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1824		uint32_t xlat_reg, lmt_reg;
1825		enum ntb_bar bar_num;
1826
1827		/*
1828		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
1829		 * workaround
1830		 */
1831		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
1832		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
1833		if (bar_is_64bit(ntb, bar_num)) {
1834			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
1835			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
1836			intel_ntb_reg_write(8, lmt_reg, 0);
1837		} else {
1838			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
1839			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
1840			intel_ntb_reg_write(4, lmt_reg, 0);
1841		}
1842
1843		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
1844	}
1845	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
1846	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
1847
1848	/* Zero outgoing translation limits (whole bar size windows) */
1849	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1850	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1851
1852	/* Set outgoing translation offsets */
1853	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1854	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1855		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1856		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1857	} else
1858		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1859
1860	/* Set the translation offset for B2B registers */
1861	bar_addr = 0;
1862	if (b2b_bar_num == NTB_CONFIG_BAR)
1863		bar_addr = peer_addr->bar0_addr;
1864	else if (b2b_bar_num == NTB_B2B_BAR_1)
1865		bar_addr = peer_addr->bar2_addr64;
1866	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1867		bar_addr = peer_addr->bar4_addr64;
1868	else if (b2b_bar_num == NTB_B2B_BAR_2)
1869		bar_addr = peer_addr->bar4_addr32;
1870	else if (b2b_bar_num == NTB_B2B_BAR_3)
1871		bar_addr = peer_addr->bar5_addr32;
1872	else
1873		KASSERT(false, ("invalid bar"));
1874
1875	/*
1876	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1877	 * at a time.
1878	 */
1879	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1880	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1881	return (0);
1882}
1883
1884static inline bool
1885_xeon_link_is_up(struct ntb_softc *ntb)
1886{
1887
1888	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1889		return (true);
1890	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1891}
1892
1893static inline bool
1894link_is_up(struct ntb_softc *ntb)
1895{
1896
1897	if (ntb->type == NTB_XEON)
1898		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
1899		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
1900
1901	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1902	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1903}
1904
1905static inline bool
1906atom_link_is_err(struct ntb_softc *ntb)
1907{
1908	uint32_t status;
1909
1910	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1911
1912	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1913	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1914		return (true);
1915
1916	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1917	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1918}
1919
1920/* Atom does not have link status interrupt, poll on that platform */
1921static void
1922atom_link_hb(void *arg)
1923{
1924	struct ntb_softc *ntb = arg;
1925	sbintime_t timo, poll_ts;
1926
1927	timo = NTB_HB_TIMEOUT * hz;
1928	poll_ts = ntb->last_ts + timo;
1929
1930	/*
1931	 * Delay polling the link status if an interrupt was received, unless
1932	 * the cached link status says the link is down.
1933	 */
1934	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1935		timo = poll_ts - ticks;
1936		goto out;
1937	}
1938
1939	if (intel_ntb_poll_link(ntb))
1940		ntb_link_event(ntb->device);
1941
1942	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1943		/* Link is down with error, proceed with recovery */
1944		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1945		return;
1946	}
1947
1948out:
1949	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1950}
1951
1952static void
1953atom_perform_link_restart(struct ntb_softc *ntb)
1954{
1955	uint32_t status;
1956
1957	/* Driver resets the NTB ModPhy lanes - magic! */
1958	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1959	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1960	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1961	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1962
1963	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1964	pause("ModPhy", hz / 10);
1965
1966	/* Clear AER Errors, write to clear */
1967	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1968	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1969	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1970
1971	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1972	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1973	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
1974	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
1975
1976	/* Clear DeSkew Buffer error, write to clear */
1977	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
1978	status |= ATOM_DESKEWSTS_DBERR;
1979	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
1980
1981	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1982	status &= ATOM_IBIST_ERR_OFLOW;
1983	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
1984
1985	/* Releases the NTB state machine to allow the link to retrain */
1986	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1987	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
1988	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
1989}
1990
1991static int
1992intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
1993    enum ntb_width width __unused)
1994{
1995	struct ntb_softc *ntb = device_get_softc(dev);
1996	uint32_t cntl;
1997
1998	intel_ntb_printf(2, "%s\n", __func__);
1999
2000	if (ntb->type == NTB_ATOM) {
2001		pci_write_config(ntb->device, NTB_PPD_OFFSET,
2002		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
2003		return (0);
2004	}
2005
2006	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2007		ntb_link_event(dev);
2008		return (0);
2009	}
2010
2011	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2012	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2013	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2014	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2015	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2016		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2017	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2018	return (0);
2019}
2020
2021static int
2022intel_ntb_link_disable(device_t dev)
2023{
2024	struct ntb_softc *ntb = device_get_softc(dev);
2025	uint32_t cntl;
2026
2027	intel_ntb_printf(2, "%s\n", __func__);
2028
2029	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2030		ntb_link_event(dev);
2031		return (0);
2032	}
2033
2034	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2035	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2036	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2037	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2038		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2039	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2040	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2041	return (0);
2042}
2043
2044static bool
2045intel_ntb_link_enabled(device_t dev)
2046{
2047	struct ntb_softc *ntb = device_get_softc(dev);
2048	uint32_t cntl;
2049
2050	if (ntb->type == NTB_ATOM) {
2051		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2052		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2053	}
2054
2055	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2056		return (true);
2057
2058	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2059	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2060}
2061
2062static void
2063recover_atom_link(void *arg)
2064{
2065	struct ntb_softc *ntb = arg;
2066	unsigned speed, width, oldspeed, oldwidth;
2067	uint32_t status32;
2068
2069	atom_perform_link_restart(ntb);
2070
2071	/*
2072	 * There is a potential race between the 2 NTB devices recovering at
2073	 * the same time.  If the times are the same, the link will not recover
2074	 * and the driver will be stuck in this loop forever.  Add a random
2075	 * interval to the recovery time to prevent this race.
2076	 */
2077	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2078	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2079
2080	if (atom_link_is_err(ntb))
2081		goto retry;
2082
2083	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2084	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2085		goto out;
2086
2087	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2088	width = NTB_LNK_STA_WIDTH(status32);
2089	speed = status32 & NTB_LINK_SPEED_MASK;
2090
2091	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2092	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2093	if (oldwidth != width || oldspeed != speed)
2094		goto retry;
2095
2096out:
2097	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2098	    ntb);
2099	return;
2100
2101retry:
2102	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2103	    ntb);
2104}
2105
2106/*
2107 * Polls the HW link status register(s); returns true if something has changed.
2108 */
2109static bool
2110intel_ntb_poll_link(struct ntb_softc *ntb)
2111{
2112	uint32_t ntb_cntl;
2113	uint16_t reg_val;
2114
2115	if (ntb->type == NTB_ATOM) {
2116		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2117		if (ntb_cntl == ntb->ntb_ctl)
2118			return (false);
2119
2120		ntb->ntb_ctl = ntb_cntl;
2121		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2122	} else {
2123		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
2124
2125		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2126		if (reg_val == ntb->lnk_sta)
2127			return (false);
2128
2129		ntb->lnk_sta = reg_val;
2130
2131		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2132			if (_xeon_link_is_up(ntb)) {
2133				if (!ntb->peer_msix_good) {
2134					callout_reset(&ntb->peer_msix_work, 0,
2135					    intel_ntb_exchange_msix, ntb);
2136					return (false);
2137				}
2138			} else {
2139				ntb->peer_msix_good = false;
2140				ntb->peer_msix_done = false;
2141			}
2142		}
2143	}
2144	return (true);
2145}
2146
2147static inline enum ntb_speed
2148intel_ntb_link_sta_speed(struct ntb_softc *ntb)
2149{
2150
2151	if (!link_is_up(ntb))
2152		return (NTB_SPEED_NONE);
2153	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2154}
2155
2156static inline enum ntb_width
2157intel_ntb_link_sta_width(struct ntb_softc *ntb)
2158{
2159
2160	if (!link_is_up(ntb))
2161		return (NTB_WIDTH_NONE);
2162	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2163}
2164
2165SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
2166    "Driver state, statistics, and HW registers");
2167
2168#define NTB_REGSZ_MASK	(3ul << 30)
2169#define NTB_REG_64	(1ul << 30)
2170#define NTB_REG_32	(2ul << 30)
2171#define NTB_REG_16	(3ul << 30)
2172#define NTB_REG_8	(0ul << 30)
2173
2174#define NTB_DB_READ	(1ul << 29)
2175#define NTB_PCI_REG	(1ul << 28)
2176#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2177
2178static void
2179intel_ntb_sysctl_init(struct ntb_softc *ntb)
2180{
2181	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2182	struct sysctl_ctx_list *ctx;
2183	struct sysctl_oid *tree, *tmptree;
2184
2185	ctx = device_get_sysctl_ctx(ntb->device);
2186	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2187
2188	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2189	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0,
2190	    sysctl_handle_link_status_human, "A",
2191	    "Link status (human readable)");
2192	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2193	    CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status,
2194	    "IU", "Link status (1=active, 0=inactive)");
2195	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2196	    CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin,
2197	    "IU", "Set/get interface status (1=UP, 0=DOWN)");
2198
2199	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2200	    CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers");
2201	tree_par = SYSCTL_CHILDREN(tree);
2202
2203	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2204	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2205	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2206	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2207	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2208	    &ntb->ppd, 0, "Raw PPD register (cached)");
2209
2210	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2211#ifdef notyet
2212		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2213		    &ntb->b2b_mw_idx, 0,
2214		    "Index of the MW used for B2B remote register access");
2215#endif
2216		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2217		    CTLFLAG_RD, &ntb->b2b_off,
2218		    "If non-zero, offset of B2B register region in shared MW");
2219	}
2220
2221	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2222	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
2223	    "Features/errata of this NTB device");
2224
2225	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2226	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2227	    "NTB CTL register (cached)");
2228	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2229	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2230	    "LNK STA register (cached)");
2231
2232#ifdef notyet
2233	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2234	    &ntb->mw_count, 0, "MW count");
2235	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2236	    &ntb->spad_count, 0, "Scratchpad count");
2237	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2238	    &ntb->db_count, 0, "Doorbell count");
2239	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2240	    &ntb->db_vec_count, 0, "Doorbell vector count");
2241	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2242	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2243#endif
2244
2245	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2246	    &ntb->db_valid_mask, "Doorbell valid mask");
2247	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2248	    &ntb->db_link_mask, "Doorbell link mask");
2249	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2250	    &ntb->db_mask, "Doorbell mask (cached)");
2251
2252	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2253	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
2254	regpar = SYSCTL_CHILDREN(tmptree);
2255
2256	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2257	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2258	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2259	    "NTB Control register");
2260	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2261	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2262	    0x19c, sysctl_handle_register, "IU",
2263	    "NTB Link Capabilities");
2264	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2265	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2266	    0x1a0, sysctl_handle_register, "IU",
2267	    "NTB Link Control register");
2268
2269	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2270	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2271	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2272	    sysctl_handle_register, "QU", "Doorbell mask register");
2273	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2274	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2275	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2276	    sysctl_handle_register, "QU", "Doorbell register");
2277
2278	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2279	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2280	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2281	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2282	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2283		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2284		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2285		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2286		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2287		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2288		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2289		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2290		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2291	} else {
2292		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2293		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2294		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2295		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2296	}
2297
2298	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2299	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2300	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2301	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2302	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2303		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2304		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2305		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2306		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2307		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2308		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2309		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2310		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2311	} else {
2312		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2313		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2314		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2315		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2316	}
2317
2318	if (ntb->type == NTB_ATOM)
2319		return;
2320
2321	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2322	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2323	statpar = SYSCTL_CHILDREN(tmptree);
2324	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2325	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2326	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2327	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2328
2329	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2330	    CTLFLAG_RD, NULL, "Xeon HW errors");
2331	errpar = SYSCTL_CHILDREN(tmptree);
2332
2333	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2334	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2335	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2336	    sysctl_handle_register, "CU", "PPD");
2337
2338	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2339	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2340	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2341	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2342	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2343	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2344	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2345	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2346	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2347	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2348	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2349	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2350
2351	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2352	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2353	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2354	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2355	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2356	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2357	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2358	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2359	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2360	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2361	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2362	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2363
2364	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2365	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2366	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2367	    sysctl_handle_register, "SU", "DEVSTS");
2368	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2369	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2370	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2371	    sysctl_handle_register, "SU", "LNKSTS");
2372	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2373	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2374	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2375	    sysctl_handle_register, "SU", "SLNKSTS");
2376
2377	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2378	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2379	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2380	    sysctl_handle_register, "IU", "UNCERRSTS");
2381	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2382	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2383	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2384	    sysctl_handle_register, "IU", "CORERRSTS");
2385
2386	if (ntb->conn_type != NTB_CONN_B2B)
2387		return;
2388
2389	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2390	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2391	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2392	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2393	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2394		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2395		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2396		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2397		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2398		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2399		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2400		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2401		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2402	} else {
2403		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2404		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2405		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2406		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2407	}
2408
2409	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2410	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2411	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2412	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2413	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2414		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2415		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2416		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2417		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2418		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2419		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2420		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2421		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2422	} else {
2423		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2424		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2425		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2426		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2427	}
2428
2429	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2430	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2431	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2432	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2433	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2434	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2435	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2436	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2437	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2438		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2439		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2440		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2441		    sysctl_handle_register, "IU",
2442		    "Secondary BAR4 base register");
2443		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2444		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2445		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2446		    sysctl_handle_register, "IU",
2447		    "Secondary BAR5 base register");
2448	} else {
2449		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2450		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2451		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2452		    sysctl_handle_register, "QU",
2453		    "Secondary BAR45 base register");
2454	}
2455}
2456
2457static int
2458sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2459{
2460	struct ntb_softc *ntb = arg1;
2461	struct sbuf sb;
2462	int error;
2463
2464	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2465
2466	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2467	error = sbuf_finish(&sb);
2468	sbuf_delete(&sb);
2469
2470	if (error || !req->newptr)
2471		return (error);
2472	return (EINVAL);
2473}
2474
2475static int
2476sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
2477{
2478	struct ntb_softc *ntb = arg1;
2479	unsigned old, new;
2480	int error;
2481
2482	old = intel_ntb_link_enabled(ntb->device);
2483
2484	error = SYSCTL_OUT(req, &old, sizeof(old));
2485	if (error != 0 || req->newptr == NULL)
2486		return (error);
2487
2488	error = SYSCTL_IN(req, &new, sizeof(new));
2489	if (error != 0)
2490		return (error);
2491
2492	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
2493	    (new != 0)? "en" : "dis");
2494
2495	if (new != 0)
2496		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2497	else
2498		error = intel_ntb_link_disable(ntb->device);
2499	return (error);
2500}
2501
2502static int
2503sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
2504{
2505	struct ntb_softc *ntb = arg1;
2506	struct sbuf sb;
2507	enum ntb_speed speed;
2508	enum ntb_width width;
2509	int error;
2510
2511	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2512
2513	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
2514		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2515		    (unsigned)speed, (unsigned)width);
2516	else
2517		sbuf_printf(&sb, "down");
2518
2519	error = sbuf_finish(&sb);
2520	sbuf_delete(&sb);
2521
2522	if (error || !req->newptr)
2523		return (error);
2524	return (EINVAL);
2525}
2526
2527static int
2528sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2529{
2530	struct ntb_softc *ntb = arg1;
2531	unsigned res;
2532	int error;
2533
2534	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
2535
2536	error = SYSCTL_OUT(req, &res, sizeof(res));
2537	if (error || !req->newptr)
2538		return (error);
2539	return (EINVAL);
2540}
2541
2542static int
2543sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2544{
2545	struct ntb_softc *ntb;
2546	const void *outp;
2547	uintptr_t sz;
2548	uint64_t umv;
2549	char be[sizeof(umv)];
2550	size_t outsz;
2551	uint32_t reg;
2552	bool db, pci;
2553	int error;
2554
2555	ntb = arg1;
2556	reg = arg2 & ~NTB_REGFLAGS_MASK;
2557	sz = arg2 & NTB_REGSZ_MASK;
2558	db = (arg2 & NTB_DB_READ) != 0;
2559	pci = (arg2 & NTB_PCI_REG) != 0;
2560
2561	KASSERT(!(db && pci), ("bogus"));
2562
2563	if (db) {
2564		KASSERT(sz == NTB_REG_64, ("bogus"));
2565		umv = db_ioread(ntb, reg);
2566		outsz = sizeof(uint64_t);
2567	} else {
2568		switch (sz) {
2569		case NTB_REG_64:
2570			if (pci)
2571				umv = pci_read_config(ntb->device, reg, 8);
2572			else
2573				umv = intel_ntb_reg_read(8, reg);
2574			outsz = sizeof(uint64_t);
2575			break;
2576		case NTB_REG_32:
2577			if (pci)
2578				umv = pci_read_config(ntb->device, reg, 4);
2579			else
2580				umv = intel_ntb_reg_read(4, reg);
2581			outsz = sizeof(uint32_t);
2582			break;
2583		case NTB_REG_16:
2584			if (pci)
2585				umv = pci_read_config(ntb->device, reg, 2);
2586			else
2587				umv = intel_ntb_reg_read(2, reg);
2588			outsz = sizeof(uint16_t);
2589			break;
2590		case NTB_REG_8:
2591			if (pci)
2592				umv = pci_read_config(ntb->device, reg, 1);
2593			else
2594				umv = intel_ntb_reg_read(1, reg);
2595			outsz = sizeof(uint8_t);
2596			break;
2597		default:
2598			panic("bogus");
2599			break;
2600		}
2601	}
2602
2603	/* Encode bigendian so that sysctl -x is legible. */
2604	be64enc(be, umv);
2605	outp = ((char *)be) + sizeof(umv) - outsz;
2606
2607	error = SYSCTL_OUT(req, outp, outsz);
2608	if (error || !req->newptr)
2609		return (error);
2610	return (EINVAL);
2611}
2612
2613static unsigned
2614intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2615{
2616
2617	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2618	    uidx >= ntb->b2b_mw_idx) ||
2619	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2620		uidx++;
2621	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2622	    uidx >= ntb->b2b_mw_idx) &&
2623	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2624		uidx++;
2625	return (uidx);
2626}
2627
2628#ifndef EARLY_AP_STARTUP
2629static int msix_ready;
2630
2631static void
2632intel_ntb_msix_ready(void *arg __unused)
2633{
2634
2635	msix_ready = 1;
2636}
2637SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
2638    intel_ntb_msix_ready, NULL);
2639#endif
2640
2641static void
2642intel_ntb_exchange_msix(void *ctx)
2643{
2644	struct ntb_softc *ntb;
2645	uint32_t val;
2646	unsigned i;
2647
2648	ntb = ctx;
2649
2650	if (ntb->peer_msix_good)
2651		goto msix_good;
2652	if (ntb->peer_msix_done)
2653		goto msix_done;
2654
2655#ifndef EARLY_AP_STARTUP
2656	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
2657	if (!msix_ready)
2658		goto reschedule;
2659#endif
2660
2661	intel_ntb_get_msix_info(ntb);
2662	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2663		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
2664		    ntb->msix_data[i].nmd_data);
2665		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
2666		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
2667	}
2668	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
2669
2670	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
2671	if (val != NTB_MSIX_VER_GUARD)
2672		goto reschedule;
2673
2674	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2675		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
2676		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
2677		ntb->peer_msix_data[i].nmd_data = val;
2678		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
2679		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
2680		ntb->peer_msix_data[i].nmd_ofs = val;
2681	}
2682
2683	ntb->peer_msix_done = true;
2684
2685msix_done:
2686	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
2687	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
2688	if (val != NTB_MSIX_RECEIVED)
2689		goto reschedule;
2690
2691	intel_ntb_spad_clear(ntb->device);
2692	ntb->peer_msix_good = true;
2693	/* Give peer time to see our NTB_MSIX_RECEIVED. */
2694	goto reschedule;
2695
2696msix_good:
2697	intel_ntb_poll_link(ntb);
2698	ntb_link_event(ntb->device);
2699	return;
2700
2701reschedule:
2702	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2703	if (_xeon_link_is_up(ntb)) {
2704		callout_reset(&ntb->peer_msix_work,
2705		    hz * (ntb->peer_msix_good ? 2 : 1) / 100,
2706		    intel_ntb_exchange_msix, ntb);
2707	} else
2708		intel_ntb_spad_clear(ntb->device);
2709}
2710
2711/*
2712 * Public API to the rest of the OS
2713 */
2714
2715static uint8_t
2716intel_ntb_spad_count(device_t dev)
2717{
2718	struct ntb_softc *ntb = device_get_softc(dev);
2719
2720	return (ntb->spad_count);
2721}
2722
2723static uint8_t
2724intel_ntb_mw_count(device_t dev)
2725{
2726	struct ntb_softc *ntb = device_get_softc(dev);
2727	uint8_t res;
2728
2729	res = ntb->mw_count;
2730	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
2731		res--;
2732	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
2733		res--;
2734	return (res);
2735}
2736
2737static int
2738intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
2739{
2740	struct ntb_softc *ntb = device_get_softc(dev);
2741
2742	if (idx >= ntb->spad_count)
2743		return (EINVAL);
2744
2745	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2746
2747	return (0);
2748}
2749
2750/*
2751 * Zeros the local scratchpad.
2752 */
2753static void
2754intel_ntb_spad_clear(device_t dev)
2755{
2756	struct ntb_softc *ntb = device_get_softc(dev);
2757	unsigned i;
2758
2759	for (i = 0; i < ntb->spad_count; i++)
2760		intel_ntb_spad_write(dev, i, 0);
2761}
2762
2763static int
2764intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2765{
2766	struct ntb_softc *ntb = device_get_softc(dev);
2767
2768	if (idx >= ntb->spad_count)
2769		return (EINVAL);
2770
2771	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2772
2773	return (0);
2774}
2775
2776static int
2777intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
2778{
2779	struct ntb_softc *ntb = device_get_softc(dev);
2780
2781	if (idx >= ntb->spad_count)
2782		return (EINVAL);
2783
2784	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2785		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2786	else
2787		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2788
2789	return (0);
2790}
2791
2792static int
2793intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2794{
2795	struct ntb_softc *ntb = device_get_softc(dev);
2796
2797	if (idx >= ntb->spad_count)
2798		return (EINVAL);
2799
2800	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2801		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
2802	else
2803		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2804
2805	return (0);
2806}
2807
2808static int
2809intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
2810    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
2811    bus_addr_t *plimit)
2812{
2813	struct ntb_softc *ntb = device_get_softc(dev);
2814	struct ntb_pci_bar_info *bar;
2815	bus_addr_t limit;
2816	size_t bar_b2b_off;
2817	enum ntb_bar bar_num;
2818
2819	if (mw_idx >= intel_ntb_mw_count(dev))
2820		return (EINVAL);
2821	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
2822
2823	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
2824	bar = &ntb->bar_info[bar_num];
2825	bar_b2b_off = 0;
2826	if (mw_idx == ntb->b2b_mw_idx) {
2827		KASSERT(ntb->b2b_off != 0,
2828		    ("user shouldn't get non-shared b2b mw"));
2829		bar_b2b_off = ntb->b2b_off;
2830	}
2831
2832	if (bar_is_64bit(ntb, bar_num))
2833		limit = BUS_SPACE_MAXADDR;
2834	else
2835		limit = BUS_SPACE_MAXADDR_32BIT;
2836
2837	if (base != NULL)
2838		*base = bar->pbase + bar_b2b_off;
2839	if (vbase != NULL)
2840		*vbase = bar->vbase + bar_b2b_off;
2841	if (size != NULL)
2842		*size = bar->size - bar_b2b_off;
2843	if (align != NULL)
2844		*align = bar->size;
2845	if (align_size != NULL)
2846		*align_size = 1;
2847	if (plimit != NULL)
2848		*plimit = limit;
2849	return (0);
2850}
2851
2852static int
2853intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
2854{
2855	struct ntb_softc *ntb = device_get_softc(dev);
2856	struct ntb_pci_bar_info *bar;
2857	uint64_t base, limit, reg_val;
2858	size_t bar_size, mw_size;
2859	uint32_t base_reg, xlat_reg, limit_reg;
2860	enum ntb_bar bar_num;
2861
2862	if (idx >= intel_ntb_mw_count(dev))
2863		return (EINVAL);
2864	idx = intel_ntb_user_mw_to_idx(ntb, idx);
2865
2866	bar_num = intel_ntb_mw_to_bar(ntb, idx);
2867	bar = &ntb->bar_info[bar_num];
2868
2869	bar_size = bar->size;
2870	if (idx == ntb->b2b_mw_idx)
2871		mw_size = bar_size - ntb->b2b_off;
2872	else
2873		mw_size = bar_size;
2874
2875	/* Hardware requires that addr is aligned to bar size */
2876	if ((addr & (bar_size - 1)) != 0)
2877		return (EINVAL);
2878
2879	if (size > mw_size)
2880		return (EINVAL);
2881
2882	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2883
2884	limit = 0;
2885	if (bar_is_64bit(ntb, bar_num)) {
2886		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
2887
2888		if (limit_reg != 0 && size != mw_size)
2889			limit = base + size;
2890
2891		/* Set and verify translation address */
2892		intel_ntb_reg_write(8, xlat_reg, addr);
2893		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
2894		if (reg_val != addr) {
2895			intel_ntb_reg_write(8, xlat_reg, 0);
2896			return (EIO);
2897		}
2898
2899		/* Set and verify the limit */
2900		intel_ntb_reg_write(8, limit_reg, limit);
2901		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
2902		if (reg_val != limit) {
2903			intel_ntb_reg_write(8, limit_reg, base);
2904			intel_ntb_reg_write(8, xlat_reg, 0);
2905			return (EIO);
2906		}
2907	} else {
2908		/* Configure 32-bit (split) BAR MW */
2909
2910		if ((addr & UINT32_MAX) != addr)
2911			return (ERANGE);
2912		if (((addr + size) & UINT32_MAX) != (addr + size))
2913			return (ERANGE);
2914
2915		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
2916
2917		if (limit_reg != 0 && size != mw_size)
2918			limit = base + size;
2919
2920		/* Set and verify translation address */
2921		intel_ntb_reg_write(4, xlat_reg, addr);
2922		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
2923		if (reg_val != addr) {
2924			intel_ntb_reg_write(4, xlat_reg, 0);
2925			return (EIO);
2926		}
2927
2928		/* Set and verify the limit */
2929		intel_ntb_reg_write(4, limit_reg, limit);
2930		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
2931		if (reg_val != limit) {
2932			intel_ntb_reg_write(4, limit_reg, base);
2933			intel_ntb_reg_write(4, xlat_reg, 0);
2934			return (EIO);
2935		}
2936	}
2937	return (0);
2938}
2939
2940static int
2941intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
2942{
2943
2944	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
2945}
2946
2947static int
2948intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
2949{
2950	struct ntb_softc *ntb = device_get_softc(dev);
2951	struct ntb_pci_bar_info *bar;
2952
2953	if (idx >= intel_ntb_mw_count(dev))
2954		return (EINVAL);
2955	idx = intel_ntb_user_mw_to_idx(ntb, idx);
2956
2957	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
2958	*mode = bar->map_mode;
2959	return (0);
2960}
2961
2962static int
2963intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
2964{
2965	struct ntb_softc *ntb = device_get_softc(dev);
2966
2967	if (idx >= intel_ntb_mw_count(dev))
2968		return (EINVAL);
2969
2970	idx = intel_ntb_user_mw_to_idx(ntb, idx);
2971	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
2972}
2973
2974static int
2975intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
2976{
2977	struct ntb_pci_bar_info *bar;
2978	int rc;
2979
2980	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
2981	if (bar->map_mode == mode)
2982		return (0);
2983
2984	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
2985	if (rc == 0)
2986		bar->map_mode = mode;
2987
2988	return (rc);
2989}
2990
2991static void
2992intel_ntb_peer_db_set(device_t dev, uint64_t bit)
2993{
2994	struct ntb_softc *ntb = device_get_softc(dev);
2995
2996	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2997		struct ntb_pci_bar_info *lapic;
2998		unsigned i;
2999
3000		lapic = ntb->peer_lapic_bar;
3001
3002		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3003			if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
3004				bus_space_write_4(lapic->pci_bus_tag,
3005				    lapic->pci_bus_handle,
3006				    ntb->peer_msix_data[i].nmd_ofs,
3007				    ntb->peer_msix_data[i].nmd_data);
3008		}
3009		return;
3010	}
3011
3012	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3013		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
3014		return;
3015	}
3016
3017	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
3018}
3019
3020static int
3021intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
3022{
3023	struct ntb_softc *ntb = device_get_softc(dev);
3024	struct ntb_pci_bar_info *bar;
3025	uint64_t regoff;
3026
3027	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
3028
3029	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3030		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3031		regoff = ntb->peer_reg->db_bell;
3032	} else {
3033		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3034		    ("invalid b2b idx"));
3035
3036		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3037		regoff = XEON_PDOORBELL_OFFSET;
3038	}
3039	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3040
3041	/* HACK: Specific to current x86 bus implementation. */
3042	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
3043	*db_size = ntb->reg->db_size;
3044	return (0);
3045}
3046
3047static uint64_t
3048intel_ntb_db_valid_mask(device_t dev)
3049{
3050	struct ntb_softc *ntb = device_get_softc(dev);
3051
3052	return (ntb->db_valid_mask);
3053}
3054
3055static int
3056intel_ntb_db_vector_count(device_t dev)
3057{
3058	struct ntb_softc *ntb = device_get_softc(dev);
3059
3060	return (ntb->db_vec_count);
3061}
3062
3063static uint64_t
3064intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
3065{
3066	struct ntb_softc *ntb = device_get_softc(dev);
3067
3068	if (vector > ntb->db_vec_count)
3069		return (0);
3070	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
3071}
3072
3073static bool
3074intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
3075{
3076	struct ntb_softc *ntb = device_get_softc(dev);
3077
3078	if (speed != NULL)
3079		*speed = intel_ntb_link_sta_speed(ntb);
3080	if (width != NULL)
3081		*width = intel_ntb_link_sta_width(ntb);
3082	return (link_is_up(ntb));
3083}
3084
3085static void
3086save_bar_parameters(struct ntb_pci_bar_info *bar)
3087{
3088
3089	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3090	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3091	bar->pbase = rman_get_start(bar->pci_resource);
3092	bar->size = rman_get_size(bar->pci_resource);
3093	bar->vbase = rman_get_virtual(bar->pci_resource);
3094}
3095
3096static device_method_t ntb_intel_methods[] = {
3097	/* Device interface */
3098	DEVMETHOD(device_probe,		intel_ntb_probe),
3099	DEVMETHOD(device_attach,	intel_ntb_attach),
3100	DEVMETHOD(device_detach,	intel_ntb_detach),
3101	/* NTB interface */
3102	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
3103	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
3104	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
3105	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
3106	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
3107	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
3108	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
3109	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
3110	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
3111	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
3112	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
3113	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
3114	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
3115	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
3116	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
3117	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
3118	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
3119	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
3120	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
3121	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
3122	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
3123	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
3124	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
3125	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
3126	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
3127	DEVMETHOD_END
3128};
3129
3130static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
3131    sizeof(struct ntb_softc));
3132DRIVER_MODULE(ntb_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
3133MODULE_DEPEND(ntb_intel, ntb, 1, 1, 1);
3134MODULE_VERSION(ntb_intel, 1);
3135