1/*-
2 * Copyright (c) 2015-2016 Svatopluk Kraus
3 * Copyright (c) 2015-2016 Michal Meloun
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31/*
32 *	New-style Interrupt Framework
33 *
34 *  TODO: - add support for disconnected PICs.
35 *        - to support IPI (PPI) enabling on other CPUs if already started.
36 *        - to complete things for removable PICs.
37 */
38
39#include "opt_ddb.h"
40#include "opt_hwpmc_hooks.h"
41#include "opt_iommu.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/syslog.h>
49#include <sys/malloc.h>
50#include <sys/proc.h>
51#include <sys/queue.h>
52#include <sys/bus.h>
53#include <sys/interrupt.h>
54#include <sys/taskqueue.h>
55#include <sys/tree.h>
56#include <sys/conf.h>
57#include <sys/cpuset.h>
58#include <sys/rman.h>
59#include <sys/sched.h>
60#include <sys/smp.h>
61#include <sys/sysctl.h>
62#include <sys/vmmeter.h>
63#ifdef HWPMC_HOOKS
64#include <sys/pmckern.h>
65#endif
66
67#include <machine/atomic.h>
68#include <machine/intr.h>
69#include <machine/cpu.h>
70#include <machine/smp.h>
71#include <machine/stdarg.h>
72
73#ifdef DDB
74#include <ddb/ddb.h>
75#endif
76
77#ifdef IOMMU
78#include <dev/iommu/iommu_msi.h>
79#endif
80
81#include "pic_if.h"
82#include "msi_if.h"
83
84#define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
85
86#ifdef DEBUG
87#define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
88    printf(fmt,##args); } while (0)
89#else
90#define debugf(fmt, args...)
91#endif
92
93MALLOC_DECLARE(M_INTRNG);
94MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
95
96/* Main interrupt handler called from assembler -> 'hidden' for C code. */
97void intr_irq_handler(struct trapframe *tf);
98
99/* Root interrupt controller stuff. */
100device_t intr_irq_root_dev;
101static intr_irq_filter_t *irq_root_filter;
102static void *irq_root_arg;
103static u_int irq_root_ipicount;
104
105struct intr_pic_child {
106	SLIST_ENTRY(intr_pic_child)	 pc_next;
107	struct intr_pic			*pc_pic;
108	intr_child_irq_filter_t		*pc_filter;
109	void				*pc_filter_arg;
110	uintptr_t			 pc_start;
111	uintptr_t			 pc_length;
112};
113
114/* Interrupt controller definition. */
115struct intr_pic {
116	SLIST_ENTRY(intr_pic)	pic_next;
117	intptr_t		pic_xref;	/* hardware identification */
118	device_t		pic_dev;
119/* Only one of FLAG_PIC or FLAG_MSI may be set */
120#define	FLAG_PIC	(1 << 0)
121#define	FLAG_MSI	(1 << 1)
122#define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
123	u_int			pic_flags;
124	struct mtx		pic_child_lock;
125	SLIST_HEAD(, intr_pic_child) pic_children;
126};
127
128static struct mtx pic_list_lock;
129static SLIST_HEAD(, intr_pic) pic_list;
130
131static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
132
133/* Interrupt source definition. */
134static struct mtx isrc_table_lock;
135static struct intr_irqsrc **irq_sources;
136u_int irq_next_free;
137
138#ifdef SMP
139#ifdef EARLY_AP_STARTUP
140static bool irq_assign_cpu = true;
141#else
142static bool irq_assign_cpu = false;
143#endif
144#endif
145
146u_int intr_nirq = NIRQ;
147SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
148    "Number of IRQs");
149
150/* Data for MI statistics reporting. */
151u_long *intrcnt;
152char *intrnames;
153size_t sintrcnt;
154size_t sintrnames;
155static u_int intrcnt_index;
156
157static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
158static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
159static struct intr_map_data * intr_map_get_map_data(u_int res_id);
160static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
161    struct intr_map_data **data);
162
163/*
164 *  Interrupt framework initialization routine.
165 */
166static void
167intr_irq_init(void *dummy __unused)
168{
169	u_int intrcnt_count;
170
171	SLIST_INIT(&pic_list);
172	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
173
174	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
175
176	/*
177	 * - 2 counters for each I/O interrupt.
178	 * - MAXCPU counters for each IPI counters for SMP.
179	 */
180	intrcnt_count = intr_nirq * 2;
181#ifdef SMP
182	intrcnt_count += INTR_IPI_COUNT * MAXCPU;
183#endif
184
185	intrcnt = mallocarray(intrcnt_count, sizeof(u_long), M_INTRNG,
186	    M_WAITOK | M_ZERO);
187	intrnames = mallocarray(intrcnt_count, INTRNAME_LEN, M_INTRNG,
188	    M_WAITOK | M_ZERO);
189	sintrcnt = intrcnt_count * sizeof(u_long);
190	sintrnames = intrcnt_count * INTRNAME_LEN;
191	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
192	    M_INTRNG, M_WAITOK | M_ZERO);
193}
194SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
195
196static void
197intrcnt_setname(const char *name, int index)
198{
199
200	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
201	    INTRNAME_LEN - 1, name);
202}
203
204/*
205 *  Update name for interrupt source with interrupt event.
206 */
207static void
208intrcnt_updatename(struct intr_irqsrc *isrc)
209{
210
211	/* QQQ: What about stray counter name? */
212	mtx_assert(&isrc_table_lock, MA_OWNED);
213	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
214}
215
216/*
217 *  Virtualization for interrupt source interrupt counter increment.
218 */
219static inline void
220isrc_increment_count(struct intr_irqsrc *isrc)
221{
222
223	if (isrc->isrc_flags & INTR_ISRCF_PPI)
224		atomic_add_long(&isrc->isrc_count[0], 1);
225	else
226		isrc->isrc_count[0]++;
227}
228
229/*
230 *  Virtualization for interrupt source interrupt stray counter increment.
231 */
232static inline void
233isrc_increment_straycount(struct intr_irqsrc *isrc)
234{
235
236	isrc->isrc_count[1]++;
237}
238
239/*
240 *  Virtualization for interrupt source interrupt name update.
241 */
242static void
243isrc_update_name(struct intr_irqsrc *isrc, const char *name)
244{
245	char str[INTRNAME_LEN];
246
247	mtx_assert(&isrc_table_lock, MA_OWNED);
248
249	if (name != NULL) {
250		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
251		intrcnt_setname(str, isrc->isrc_index);
252		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
253		    name);
254		intrcnt_setname(str, isrc->isrc_index + 1);
255	} else {
256		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
257		intrcnt_setname(str, isrc->isrc_index);
258		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
259		intrcnt_setname(str, isrc->isrc_index + 1);
260	}
261}
262
263/*
264 *  Virtualization for interrupt source interrupt counters setup.
265 */
266static void
267isrc_setup_counters(struct intr_irqsrc *isrc)
268{
269	u_int index;
270
271	/*
272	 *  XXX - it does not work well with removable controllers and
273	 *        interrupt sources !!!
274	 */
275	index = atomic_fetchadd_int(&intrcnt_index, 2);
276	isrc->isrc_index = index;
277	isrc->isrc_count = &intrcnt[index];
278	isrc_update_name(isrc, NULL);
279}
280
281/*
282 *  Virtualization for interrupt source interrupt counters release.
283 */
284static void
285isrc_release_counters(struct intr_irqsrc *isrc)
286{
287
288	panic("%s: not implemented", __func__);
289}
290
291#ifdef SMP
292/*
293 *  Virtualization for interrupt source IPI counters setup.
294 */
295u_long *
296intr_ipi_setup_counters(const char *name)
297{
298	u_int index, i;
299	char str[INTRNAME_LEN];
300
301	index = atomic_fetchadd_int(&intrcnt_index, MAXCPU);
302	for (i = 0; i < MAXCPU; i++) {
303		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
304		intrcnt_setname(str, index + i);
305	}
306	return (&intrcnt[index]);
307}
308#endif
309
310/*
311 *  Main interrupt dispatch handler. It's called straight
312 *  from the assembler, where CPU interrupt is served.
313 */
314void
315intr_irq_handler(struct trapframe *tf)
316{
317	struct trapframe * oldframe;
318	struct thread * td;
319
320	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
321
322	VM_CNT_INC(v_intr);
323	critical_enter();
324	td = curthread;
325	oldframe = td->td_intr_frame;
326	td->td_intr_frame = tf;
327	irq_root_filter(irq_root_arg);
328	td->td_intr_frame = oldframe;
329	critical_exit();
330#ifdef HWPMC_HOOKS
331	if (pmc_hook && TRAPF_USERMODE(tf) &&
332	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
333		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
334#endif
335}
336
337int
338intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
339{
340	struct intr_pic_child *child;
341	bool found;
342
343	found = false;
344	mtx_lock_spin(&parent->pic_child_lock);
345	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
346		if (child->pc_start <= irq &&
347		    irq < (child->pc_start + child->pc_length)) {
348			found = true;
349			break;
350		}
351	}
352	mtx_unlock_spin(&parent->pic_child_lock);
353
354	if (found)
355		return (child->pc_filter(child->pc_filter_arg, irq));
356
357	return (FILTER_STRAY);
358}
359
360/*
361 *  interrupt controller dispatch function for interrupts. It should
362 *  be called straight from the interrupt controller, when associated interrupt
363 *  source is learned.
364 */
365int
366intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
367{
368
369	KASSERT(isrc != NULL, ("%s: no source", __func__));
370
371	isrc_increment_count(isrc);
372
373#ifdef INTR_SOLO
374	if (isrc->isrc_filter != NULL) {
375		int error;
376		error = isrc->isrc_filter(isrc->isrc_arg, tf);
377		PIC_POST_FILTER(isrc->isrc_dev, isrc);
378		if (error == FILTER_HANDLED)
379			return (0);
380	} else
381#endif
382	if (isrc->isrc_event != NULL) {
383		if (intr_event_handle(isrc->isrc_event, tf) == 0)
384			return (0);
385	}
386
387	isrc_increment_straycount(isrc);
388	return (EINVAL);
389}
390
391/*
392 *  Alloc unique interrupt number (resource handle) for interrupt source.
393 *
394 *  There could be various strategies how to allocate free interrupt number
395 *  (resource handle) for new interrupt source.
396 *
397 *  1. Handles are always allocated forward, so handles are not recycled
398 *     immediately. However, if only one free handle left which is reused
399 *     constantly...
400 */
401static inline int
402isrc_alloc_irq(struct intr_irqsrc *isrc)
403{
404	u_int maxirqs, irq;
405
406	mtx_assert(&isrc_table_lock, MA_OWNED);
407
408	maxirqs = intr_nirq;
409	if (irq_next_free >= maxirqs)
410		return (ENOSPC);
411
412	for (irq = irq_next_free; irq < maxirqs; irq++) {
413		if (irq_sources[irq] == NULL)
414			goto found;
415	}
416	for (irq = 0; irq < irq_next_free; irq++) {
417		if (irq_sources[irq] == NULL)
418			goto found;
419	}
420
421	irq_next_free = maxirqs;
422	return (ENOSPC);
423
424found:
425	isrc->isrc_irq = irq;
426	irq_sources[irq] = isrc;
427
428	irq_next_free = irq + 1;
429	if (irq_next_free >= maxirqs)
430		irq_next_free = 0;
431	return (0);
432}
433
434/*
435 *  Free unique interrupt number (resource handle) from interrupt source.
436 */
437static inline int
438isrc_free_irq(struct intr_irqsrc *isrc)
439{
440
441	mtx_assert(&isrc_table_lock, MA_OWNED);
442
443	if (isrc->isrc_irq >= intr_nirq)
444		return (EINVAL);
445	if (irq_sources[isrc->isrc_irq] != isrc)
446		return (EINVAL);
447
448	irq_sources[isrc->isrc_irq] = NULL;
449	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
450	return (0);
451}
452
453/*
454 *  Initialize interrupt source and register it into global interrupt table.
455 */
456int
457intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
458    const char *fmt, ...)
459{
460	int error;
461	va_list ap;
462
463	bzero(isrc, sizeof(struct intr_irqsrc));
464	isrc->isrc_dev = dev;
465	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
466	isrc->isrc_flags = flags;
467
468	va_start(ap, fmt);
469	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
470	va_end(ap);
471
472	mtx_lock(&isrc_table_lock);
473	error = isrc_alloc_irq(isrc);
474	if (error != 0) {
475		mtx_unlock(&isrc_table_lock);
476		return (error);
477	}
478	/*
479	 * Setup interrupt counters, but not for IPI sources. Those are setup
480	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
481	 * our counter pool.
482	 */
483	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
484		isrc_setup_counters(isrc);
485	mtx_unlock(&isrc_table_lock);
486	return (0);
487}
488
489/*
490 *  Deregister interrupt source from global interrupt table.
491 */
492int
493intr_isrc_deregister(struct intr_irqsrc *isrc)
494{
495	int error;
496
497	mtx_lock(&isrc_table_lock);
498	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
499		isrc_release_counters(isrc);
500	error = isrc_free_irq(isrc);
501	mtx_unlock(&isrc_table_lock);
502	return (error);
503}
504
505#ifdef SMP
506/*
507 *  A support function for a PIC to decide if provided ISRC should be inited
508 *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
509 *  struct intr_irqsrc is the following:
510 *
511 *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
512 *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
513 *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
514 */
515bool
516intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
517{
518
519	if (isrc->isrc_handlers == 0)
520		return (false);
521	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
522		return (false);
523	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
524		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
525
526	CPU_SET(cpu, &isrc->isrc_cpu);
527	return (true);
528}
529#endif
530
531#ifdef INTR_SOLO
532/*
533 *  Setup filter into interrupt source.
534 */
535static int
536iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
537    intr_irq_filter_t *filter, void *arg, void **cookiep)
538{
539
540	if (filter == NULL)
541		return (EINVAL);
542
543	mtx_lock(&isrc_table_lock);
544	/*
545	 * Make sure that we do not mix the two ways
546	 * how we handle interrupt sources.
547	 */
548	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
549		mtx_unlock(&isrc_table_lock);
550		return (EBUSY);
551	}
552	isrc->isrc_filter = filter;
553	isrc->isrc_arg = arg;
554	isrc_update_name(isrc, name);
555	mtx_unlock(&isrc_table_lock);
556
557	*cookiep = isrc;
558	return (0);
559}
560#endif
561
562/*
563 *  Interrupt source pre_ithread method for MI interrupt framework.
564 */
565static void
566intr_isrc_pre_ithread(void *arg)
567{
568	struct intr_irqsrc *isrc = arg;
569
570	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
571}
572
573/*
574 *  Interrupt source post_ithread method for MI interrupt framework.
575 */
576static void
577intr_isrc_post_ithread(void *arg)
578{
579	struct intr_irqsrc *isrc = arg;
580
581	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
582}
583
584/*
585 *  Interrupt source post_filter method for MI interrupt framework.
586 */
587static void
588intr_isrc_post_filter(void *arg)
589{
590	struct intr_irqsrc *isrc = arg;
591
592	PIC_POST_FILTER(isrc->isrc_dev, isrc);
593}
594
595/*
596 *  Interrupt source assign_cpu method for MI interrupt framework.
597 */
598static int
599intr_isrc_assign_cpu(void *arg, int cpu)
600{
601#ifdef SMP
602	struct intr_irqsrc *isrc = arg;
603	int error;
604
605	mtx_lock(&isrc_table_lock);
606	if (cpu == NOCPU) {
607		CPU_ZERO(&isrc->isrc_cpu);
608		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
609	} else {
610		CPU_SETOF(cpu, &isrc->isrc_cpu);
611		isrc->isrc_flags |= INTR_ISRCF_BOUND;
612	}
613
614	/*
615	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
616	 * re-balance it to another CPU or enable it on more CPUs. However,
617	 * PIC is expected to change isrc_cpu appropriately to keep us well
618	 * informed if the call is successful.
619	 */
620	if (irq_assign_cpu) {
621		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
622		if (error) {
623			CPU_ZERO(&isrc->isrc_cpu);
624			mtx_unlock(&isrc_table_lock);
625			return (error);
626		}
627	}
628	mtx_unlock(&isrc_table_lock);
629	return (0);
630#else
631	return (EOPNOTSUPP);
632#endif
633}
634
635/*
636 *  Create interrupt event for interrupt source.
637 */
638static int
639isrc_event_create(struct intr_irqsrc *isrc)
640{
641	struct intr_event *ie;
642	int error;
643
644	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
645	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
646	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
647	if (error)
648		return (error);
649
650	mtx_lock(&isrc_table_lock);
651	/*
652	 * Make sure that we do not mix the two ways
653	 * how we handle interrupt sources. Let contested event wins.
654	 */
655#ifdef INTR_SOLO
656	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
657#else
658	if (isrc->isrc_event != NULL) {
659#endif
660		mtx_unlock(&isrc_table_lock);
661		intr_event_destroy(ie);
662		return (isrc->isrc_event != NULL ? EBUSY : 0);
663	}
664	isrc->isrc_event = ie;
665	mtx_unlock(&isrc_table_lock);
666
667	return (0);
668}
669#ifdef notyet
670/*
671 *  Destroy interrupt event for interrupt source.
672 */
673static void
674isrc_event_destroy(struct intr_irqsrc *isrc)
675{
676	struct intr_event *ie;
677
678	mtx_lock(&isrc_table_lock);
679	ie = isrc->isrc_event;
680	isrc->isrc_event = NULL;
681	mtx_unlock(&isrc_table_lock);
682
683	if (ie != NULL)
684		intr_event_destroy(ie);
685}
686#endif
687/*
688 *  Add handler to interrupt source.
689 */
690static int
691isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
692    driver_filter_t filter, driver_intr_t handler, void *arg,
693    enum intr_type flags, void **cookiep)
694{
695	int error;
696
697	if (isrc->isrc_event == NULL) {
698		error = isrc_event_create(isrc);
699		if (error)
700			return (error);
701	}
702
703	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
704	    arg, intr_priority(flags), flags, cookiep);
705	if (error == 0) {
706		mtx_lock(&isrc_table_lock);
707		intrcnt_updatename(isrc);
708		mtx_unlock(&isrc_table_lock);
709	}
710
711	return (error);
712}
713
714/*
715 *  Lookup interrupt controller locked.
716 */
717static inline struct intr_pic *
718pic_lookup_locked(device_t dev, intptr_t xref, int flags)
719{
720	struct intr_pic *pic;
721
722	mtx_assert(&pic_list_lock, MA_OWNED);
723
724	if (dev == NULL && xref == 0)
725		return (NULL);
726
727	/* Note that pic->pic_dev is never NULL on registered PIC. */
728	SLIST_FOREACH(pic, &pic_list, pic_next) {
729		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
730		    (flags & FLAG_TYPE_MASK))
731			continue;
732
733		if (dev == NULL) {
734			if (xref == pic->pic_xref)
735				return (pic);
736		} else if (xref == 0 || pic->pic_xref == 0) {
737			if (dev == pic->pic_dev)
738				return (pic);
739		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
740				return (pic);
741	}
742	return (NULL);
743}
744
745/*
746 *  Lookup interrupt controller.
747 */
748static struct intr_pic *
749pic_lookup(device_t dev, intptr_t xref, int flags)
750{
751	struct intr_pic *pic;
752
753	mtx_lock(&pic_list_lock);
754	pic = pic_lookup_locked(dev, xref, flags);
755	mtx_unlock(&pic_list_lock);
756	return (pic);
757}
758
759/*
760 *  Create interrupt controller.
761 */
762static struct intr_pic *
763pic_create(device_t dev, intptr_t xref, int flags)
764{
765	struct intr_pic *pic;
766
767	mtx_lock(&pic_list_lock);
768	pic = pic_lookup_locked(dev, xref, flags);
769	if (pic != NULL) {
770		mtx_unlock(&pic_list_lock);
771		return (pic);
772	}
773	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
774	if (pic == NULL) {
775		mtx_unlock(&pic_list_lock);
776		return (NULL);
777	}
778	pic->pic_xref = xref;
779	pic->pic_dev = dev;
780	pic->pic_flags = flags;
781	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
782	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
783	mtx_unlock(&pic_list_lock);
784
785	return (pic);
786}
787#ifdef notyet
788/*
789 *  Destroy interrupt controller.
790 */
791static void
792pic_destroy(device_t dev, intptr_t xref, int flags)
793{
794	struct intr_pic *pic;
795
796	mtx_lock(&pic_list_lock);
797	pic = pic_lookup_locked(dev, xref, flags);
798	if (pic == NULL) {
799		mtx_unlock(&pic_list_lock);
800		return;
801	}
802	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
803	mtx_unlock(&pic_list_lock);
804
805	free(pic, M_INTRNG);
806}
807#endif
808/*
809 *  Register interrupt controller.
810 */
811struct intr_pic *
812intr_pic_register(device_t dev, intptr_t xref)
813{
814	struct intr_pic *pic;
815
816	if (dev == NULL)
817		return (NULL);
818	pic = pic_create(dev, xref, FLAG_PIC);
819	if (pic == NULL)
820		return (NULL);
821
822	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
823	    device_get_nameunit(dev), dev, (uintmax_t)xref);
824	return (pic);
825}
826
827/*
828 *  Unregister interrupt controller.
829 */
830int
831intr_pic_deregister(device_t dev, intptr_t xref)
832{
833
834	panic("%s: not implemented", __func__);
835}
836
837/*
838 *  Mark interrupt controller (itself) as a root one.
839 *
840 *  Note that only an interrupt controller can really know its position
841 *  in interrupt controller's tree. So root PIC must claim itself as a root.
842 *
843 *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
844 *  page 30:
845 *    "The root of the interrupt tree is determined when traversal
846 *     of the interrupt tree reaches an interrupt controller node without
847 *     an interrupts property and thus no explicit interrupt parent."
848 */
849int
850intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
851    void *arg, u_int ipicount)
852{
853	struct intr_pic *pic;
854
855	pic = pic_lookup(dev, xref, FLAG_PIC);
856	if (pic == NULL) {
857		device_printf(dev, "not registered\n");
858		return (EINVAL);
859	}
860
861	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
862	    ("%s: Found a non-PIC controller: %s", __func__,
863	     device_get_name(pic->pic_dev)));
864
865	if (filter == NULL) {
866		device_printf(dev, "filter missing\n");
867		return (EINVAL);
868	}
869
870	/*
871	 * Only one interrupt controllers could be on the root for now.
872	 * Note that we further suppose that there is not threaded interrupt
873	 * routine (handler) on the root. See intr_irq_handler().
874	 */
875	if (intr_irq_root_dev != NULL) {
876		device_printf(dev, "another root already set\n");
877		return (EBUSY);
878	}
879
880	intr_irq_root_dev = dev;
881	irq_root_filter = filter;
882	irq_root_arg = arg;
883	irq_root_ipicount = ipicount;
884
885	debugf("irq root set to %s\n", device_get_nameunit(dev));
886	return (0);
887}
888
889/*
890 * Add a handler to manage a sub range of a parents interrupts.
891 */
892struct intr_pic *
893intr_pic_add_handler(device_t parent, struct intr_pic *pic,
894    intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
895    uintptr_t length)
896{
897	struct intr_pic *parent_pic;
898	struct intr_pic_child *newchild;
899#ifdef INVARIANTS
900	struct intr_pic_child *child;
901#endif
902
903	/* Find the parent PIC */
904	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
905	if (parent_pic == NULL)
906		return (NULL);
907
908	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
909	newchild->pc_pic = pic;
910	newchild->pc_filter = filter;
911	newchild->pc_filter_arg = arg;
912	newchild->pc_start = start;
913	newchild->pc_length = length;
914
915	mtx_lock_spin(&parent_pic->pic_child_lock);
916#ifdef INVARIANTS
917	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
918		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
919		    __func__));
920	}
921#endif
922	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
923	mtx_unlock_spin(&parent_pic->pic_child_lock);
924
925	return (pic);
926}
927
928static int
929intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
930    struct intr_irqsrc **isrc)
931{
932	struct intr_pic *pic;
933	struct intr_map_data_msi *msi;
934
935	if (data == NULL)
936		return (EINVAL);
937
938	pic = pic_lookup(dev, xref,
939	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
940	if (pic == NULL)
941		return (ESRCH);
942
943	switch (data->type) {
944	case INTR_MAP_DATA_MSI:
945		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
946		    ("%s: Found a non-MSI controller: %s", __func__,
947		     device_get_name(pic->pic_dev)));
948		msi = (struct intr_map_data_msi *)data;
949		*isrc = msi->isrc;
950		return (0);
951
952	default:
953		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
954		    ("%s: Found a non-PIC controller: %s", __func__,
955		     device_get_name(pic->pic_dev)));
956		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
957	}
958}
959
960bool
961intr_is_per_cpu(struct resource *res)
962{
963	u_int res_id;
964	struct intr_irqsrc *isrc;
965
966	res_id = (u_int)rman_get_start(res);
967	isrc = intr_map_get_isrc(res_id);
968
969	if (isrc == NULL)
970		panic("Attempt to get isrc for non-active resource id: %u\n",
971		    res_id);
972	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
973}
974
975int
976intr_activate_irq(device_t dev, struct resource *res)
977{
978	device_t map_dev;
979	intptr_t map_xref;
980	struct intr_map_data *data;
981	struct intr_irqsrc *isrc;
982	u_int res_id;
983	int error;
984
985	KASSERT(rman_get_start(res) == rman_get_end(res),
986	    ("%s: more interrupts in resource", __func__));
987
988	res_id = (u_int)rman_get_start(res);
989	if (intr_map_get_isrc(res_id) != NULL)
990		panic("Attempt to double activation of resource id: %u\n",
991		    res_id);
992	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
993	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
994	if (error != 0) {
995		free(data, M_INTRNG);
996		/* XXX TODO DISCONECTED PICs */
997		/* if (error == EINVAL) return(0); */
998		return (error);
999	}
1000	intr_map_set_isrc(res_id, isrc);
1001	rman_set_virtual(res, data);
1002	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1003}
1004
1005int
1006intr_deactivate_irq(device_t dev, struct resource *res)
1007{
1008	struct intr_map_data *data;
1009	struct intr_irqsrc *isrc;
1010	u_int res_id;
1011	int error;
1012
1013	KASSERT(rman_get_start(res) == rman_get_end(res),
1014	    ("%s: more interrupts in resource", __func__));
1015
1016	res_id = (u_int)rman_get_start(res);
1017	isrc = intr_map_get_isrc(res_id);
1018	if (isrc == NULL)
1019		panic("Attempt to deactivate non-active resource id: %u\n",
1020		    res_id);
1021
1022	data = rman_get_virtual(res);
1023	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1024	intr_map_set_isrc(res_id, NULL);
1025	rman_set_virtual(res, NULL);
1026	free(data, M_INTRNG);
1027	return (error);
1028}
1029
1030int
1031intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1032    driver_intr_t hand, void *arg, int flags, void **cookiep)
1033{
1034	int error;
1035	struct intr_map_data *data;
1036	struct intr_irqsrc *isrc;
1037	const char *name;
1038	u_int res_id;
1039
1040	KASSERT(rman_get_start(res) == rman_get_end(res),
1041	    ("%s: more interrupts in resource", __func__));
1042
1043	res_id = (u_int)rman_get_start(res);
1044	isrc = intr_map_get_isrc(res_id);
1045	if (isrc == NULL) {
1046		/* XXX TODO DISCONECTED PICs */
1047		return (EINVAL);
1048	}
1049
1050	data = rman_get_virtual(res);
1051	name = device_get_nameunit(dev);
1052
1053#ifdef INTR_SOLO
1054	/*
1055	 * Standard handling is done through MI interrupt framework. However,
1056	 * some interrupts could request solely own special handling. This
1057	 * non standard handling can be used for interrupt controllers without
1058	 * handler (filter only), so in case that interrupt controllers are
1059	 * chained, MI interrupt framework is called only in leaf controller.
1060	 *
1061	 * Note that root interrupt controller routine is served as well,
1062	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1063	 */
1064	if (flags & INTR_SOLO && hand != NULL) {
1065		debugf("irq %u cannot solo on %s\n", irq, name);
1066		return (EINVAL);
1067	}
1068
1069	if (flags & INTR_SOLO) {
1070		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1071		    arg, cookiep);
1072		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1073		    name);
1074	} else
1075#endif
1076		{
1077		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1078		    cookiep);
1079		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1080	}
1081	if (error != 0)
1082		return (error);
1083
1084	mtx_lock(&isrc_table_lock);
1085	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1086	if (error == 0) {
1087		isrc->isrc_handlers++;
1088		if (isrc->isrc_handlers == 1)
1089			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1090	}
1091	mtx_unlock(&isrc_table_lock);
1092	if (error != 0)
1093		intr_event_remove_handler(*cookiep);
1094	return (error);
1095}
1096
1097int
1098intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1099{
1100	int error;
1101	struct intr_map_data *data;
1102	struct intr_irqsrc *isrc;
1103	u_int res_id;
1104
1105	KASSERT(rman_get_start(res) == rman_get_end(res),
1106	    ("%s: more interrupts in resource", __func__));
1107
1108	res_id = (u_int)rman_get_start(res);
1109	isrc = intr_map_get_isrc(res_id);
1110	if (isrc == NULL || isrc->isrc_handlers == 0)
1111		return (EINVAL);
1112
1113	data = rman_get_virtual(res);
1114
1115#ifdef INTR_SOLO
1116	if (isrc->isrc_filter != NULL) {
1117		if (isrc != cookie)
1118			return (EINVAL);
1119
1120		mtx_lock(&isrc_table_lock);
1121		isrc->isrc_filter = NULL;
1122		isrc->isrc_arg = NULL;
1123		isrc->isrc_handlers = 0;
1124		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1125		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1126		isrc_update_name(isrc, NULL);
1127		mtx_unlock(&isrc_table_lock);
1128		return (0);
1129	}
1130#endif
1131	if (isrc != intr_handler_source(cookie))
1132		return (EINVAL);
1133
1134	error = intr_event_remove_handler(cookie);
1135	if (error == 0) {
1136		mtx_lock(&isrc_table_lock);
1137		isrc->isrc_handlers--;
1138		if (isrc->isrc_handlers == 0)
1139			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1140		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1141		intrcnt_updatename(isrc);
1142		mtx_unlock(&isrc_table_lock);
1143	}
1144	return (error);
1145}
1146
1147int
1148intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1149    const char *descr)
1150{
1151	int error;
1152	struct intr_irqsrc *isrc;
1153	u_int res_id;
1154
1155	KASSERT(rman_get_start(res) == rman_get_end(res),
1156	    ("%s: more interrupts in resource", __func__));
1157
1158	res_id = (u_int)rman_get_start(res);
1159	isrc = intr_map_get_isrc(res_id);
1160	if (isrc == NULL || isrc->isrc_handlers == 0)
1161		return (EINVAL);
1162#ifdef INTR_SOLO
1163	if (isrc->isrc_filter != NULL) {
1164		if (isrc != cookie)
1165			return (EINVAL);
1166
1167		mtx_lock(&isrc_table_lock);
1168		isrc_update_name(isrc, descr);
1169		mtx_unlock(&isrc_table_lock);
1170		return (0);
1171	}
1172#endif
1173	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1174	if (error == 0) {
1175		mtx_lock(&isrc_table_lock);
1176		intrcnt_updatename(isrc);
1177		mtx_unlock(&isrc_table_lock);
1178	}
1179	return (error);
1180}
1181
1182#ifdef SMP
1183int
1184intr_bind_irq(device_t dev, struct resource *res, int cpu)
1185{
1186	struct intr_irqsrc *isrc;
1187	u_int res_id;
1188
1189	KASSERT(rman_get_start(res) == rman_get_end(res),
1190	    ("%s: more interrupts in resource", __func__));
1191
1192	res_id = (u_int)rman_get_start(res);
1193	isrc = intr_map_get_isrc(res_id);
1194	if (isrc == NULL || isrc->isrc_handlers == 0)
1195		return (EINVAL);
1196#ifdef INTR_SOLO
1197	if (isrc->isrc_filter != NULL)
1198		return (intr_isrc_assign_cpu(isrc, cpu));
1199#endif
1200	return (intr_event_bind(isrc->isrc_event, cpu));
1201}
1202
1203/*
1204 * Return the CPU that the next interrupt source should use.
1205 * For now just returns the next CPU according to round-robin.
1206 */
1207u_int
1208intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1209{
1210	u_int cpu;
1211
1212	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1213	if (!irq_assign_cpu || mp_ncpus == 1) {
1214		cpu = PCPU_GET(cpuid);
1215
1216		if (CPU_ISSET(cpu, cpumask))
1217			return (curcpu);
1218
1219		return (CPU_FFS(cpumask) - 1);
1220	}
1221
1222	do {
1223		last_cpu++;
1224		if (last_cpu > mp_maxid)
1225			last_cpu = 0;
1226	} while (!CPU_ISSET(last_cpu, cpumask));
1227	return (last_cpu);
1228}
1229
1230#ifndef EARLY_AP_STARTUP
1231/*
1232 *  Distribute all the interrupt sources among the available
1233 *  CPUs once the AP's have been launched.
1234 */
1235static void
1236intr_irq_shuffle(void *arg __unused)
1237{
1238	struct intr_irqsrc *isrc;
1239	u_int i;
1240
1241	if (mp_ncpus == 1)
1242		return;
1243
1244	mtx_lock(&isrc_table_lock);
1245	irq_assign_cpu = true;
1246	for (i = 0; i < intr_nirq; i++) {
1247		isrc = irq_sources[i];
1248		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1249		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1250			continue;
1251
1252		if (isrc->isrc_event != NULL &&
1253		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1254		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1255			panic("%s: CPU inconsistency", __func__);
1256
1257		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1258			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1259
1260		/*
1261		 * We are in wicked position here if the following call fails
1262		 * for bound ISRC. The best thing we can do is to clear
1263		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1264		 */
1265		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1266			CPU_ZERO(&isrc->isrc_cpu);
1267	}
1268	mtx_unlock(&isrc_table_lock);
1269}
1270SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1271#endif /* !EARLY_AP_STARTUP */
1272
1273#else
1274u_int
1275intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1276{
1277
1278	return (PCPU_GET(cpuid));
1279}
1280#endif /* SMP */
1281
1282/*
1283 * Allocate memory for new intr_map_data structure.
1284 * Initialize common fields.
1285 */
1286struct intr_map_data *
1287intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1288{
1289	struct intr_map_data *data;
1290
1291	data = malloc(len, M_INTRNG, flags);
1292	data->type = type;
1293	data->len = len;
1294	return (data);
1295}
1296
1297void intr_free_intr_map_data(struct intr_map_data *data)
1298{
1299
1300	free(data, M_INTRNG);
1301}
1302
1303/*
1304 *  Register a MSI/MSI-X interrupt controller
1305 */
1306int
1307intr_msi_register(device_t dev, intptr_t xref)
1308{
1309	struct intr_pic *pic;
1310
1311	if (dev == NULL)
1312		return (EINVAL);
1313	pic = pic_create(dev, xref, FLAG_MSI);
1314	if (pic == NULL)
1315		return (ENOMEM);
1316
1317	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1318	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1319	return (0);
1320}
1321
1322int
1323intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1324    int maxcount, int *irqs)
1325{
1326	struct iommu_domain *domain;
1327	struct intr_irqsrc **isrc;
1328	struct intr_pic *pic;
1329	device_t pdev;
1330	struct intr_map_data_msi *msi;
1331	int err, i;
1332
1333	pic = pic_lookup(NULL, xref, FLAG_MSI);
1334	if (pic == NULL)
1335		return (ESRCH);
1336
1337	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1338	    ("%s: Found a non-MSI controller: %s", __func__,
1339	     device_get_name(pic->pic_dev)));
1340
1341	/*
1342	 * If this is the first time we have used this context ask the
1343	 * interrupt controller to map memory the msi source will need.
1344	 */
1345	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1346	if (err != 0)
1347		return (err);
1348
1349	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1350	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1351	if (err != 0) {
1352		free(isrc, M_INTRNG);
1353		return (err);
1354	}
1355
1356	for (i = 0; i < count; i++) {
1357		isrc[i]->isrc_iommu = domain;
1358		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1359		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1360		msi-> isrc = isrc[i];
1361
1362		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1363		    (struct intr_map_data *)msi);
1364	}
1365	free(isrc, M_INTRNG);
1366
1367	return (err);
1368}
1369
1370int
1371intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1372    int *irqs)
1373{
1374	struct intr_irqsrc **isrc;
1375	struct intr_pic *pic;
1376	struct intr_map_data_msi *msi;
1377	int i, err;
1378
1379	pic = pic_lookup(NULL, xref, FLAG_MSI);
1380	if (pic == NULL)
1381		return (ESRCH);
1382
1383	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1384	    ("%s: Found a non-MSI controller: %s", __func__,
1385	     device_get_name(pic->pic_dev)));
1386
1387	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1388
1389	for (i = 0; i < count; i++) {
1390		msi = (struct intr_map_data_msi *)
1391		    intr_map_get_map_data(irqs[i]);
1392		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1393		    ("%s: irq %d map data is not MSI", __func__,
1394		    irqs[i]));
1395		isrc[i] = msi->isrc;
1396	}
1397
1398	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1399
1400	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1401
1402	for (i = 0; i < count; i++) {
1403		if (isrc[i] != NULL)
1404			intr_unmap_irq(irqs[i]);
1405	}
1406
1407	free(isrc, M_INTRNG);
1408	return (err);
1409}
1410
1411int
1412intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1413{
1414	struct iommu_domain *domain;
1415	struct intr_irqsrc *isrc;
1416	struct intr_pic *pic;
1417	device_t pdev;
1418	struct intr_map_data_msi *msi;
1419	int err;
1420
1421	pic = pic_lookup(NULL, xref, FLAG_MSI);
1422	if (pic == NULL)
1423		return (ESRCH);
1424
1425	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1426	    ("%s: Found a non-MSI controller: %s", __func__,
1427	     device_get_name(pic->pic_dev)));
1428
1429	/*
1430	 * If this is the first time we have used this context ask the
1431	 * interrupt controller to map memory the msi source will need.
1432	 */
1433	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1434	if (err != 0)
1435		return (err);
1436
1437	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1438	if (err != 0)
1439		return (err);
1440
1441	isrc->isrc_iommu = domain;
1442	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1443		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1444	msi->isrc = isrc;
1445	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1446	return (0);
1447}
1448
1449int
1450intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1451{
1452	struct intr_irqsrc *isrc;
1453	struct intr_pic *pic;
1454	struct intr_map_data_msi *msi;
1455	int err;
1456
1457	pic = pic_lookup(NULL, xref, FLAG_MSI);
1458	if (pic == NULL)
1459		return (ESRCH);
1460
1461	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1462	    ("%s: Found a non-MSI controller: %s", __func__,
1463	     device_get_name(pic->pic_dev)));
1464
1465	msi = (struct intr_map_data_msi *)
1466	    intr_map_get_map_data(irq);
1467	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1468	    ("%s: irq %d map data is not MSI", __func__,
1469	    irq));
1470	isrc = msi->isrc;
1471	if (isrc == NULL) {
1472		intr_unmap_irq(irq);
1473		return (EINVAL);
1474	}
1475
1476	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1477
1478	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1479	intr_unmap_irq(irq);
1480
1481	return (err);
1482}
1483
1484int
1485intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1486    uint64_t *addr, uint32_t *data)
1487{
1488	struct intr_irqsrc *isrc;
1489	struct intr_pic *pic;
1490	int err;
1491
1492	pic = pic_lookup(NULL, xref, FLAG_MSI);
1493	if (pic == NULL)
1494		return (ESRCH);
1495
1496	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1497	    ("%s: Found a non-MSI controller: %s", __func__,
1498	     device_get_name(pic->pic_dev)));
1499
1500	isrc = intr_map_get_isrc(irq);
1501	if (isrc == NULL)
1502		return (EINVAL);
1503
1504	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1505
1506#ifdef IOMMU
1507	if (isrc->isrc_iommu != NULL)
1508		iommu_translate_msi(isrc->isrc_iommu, addr);
1509#endif
1510
1511	return (err);
1512}
1513
1514void dosoftints(void);
1515void
1516dosoftints(void)
1517{
1518}
1519
1520#ifdef SMP
1521/*
1522 *  Init interrupt controller on another CPU.
1523 */
1524void
1525intr_pic_init_secondary(void)
1526{
1527
1528	/*
1529	 * QQQ: Only root PIC is aware of other CPUs ???
1530	 */
1531	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1532
1533	//mtx_lock(&isrc_table_lock);
1534	PIC_INIT_SECONDARY(intr_irq_root_dev);
1535	//mtx_unlock(&isrc_table_lock);
1536}
1537#endif
1538
1539#ifdef DDB
1540DB_SHOW_COMMAND(irqs, db_show_irqs)
1541{
1542	u_int i, irqsum;
1543	u_long num;
1544	struct intr_irqsrc *isrc;
1545
1546	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1547		isrc = irq_sources[i];
1548		if (isrc == NULL)
1549			continue;
1550
1551		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1552		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1553		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1554		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1555		irqsum += num;
1556	}
1557	db_printf("irq total %u\n", irqsum);
1558}
1559#endif
1560
1561/*
1562 * Interrupt mapping table functions.
1563 *
1564 * Please, keep this part separately, it can be transformed to
1565 * extension of standard resources.
1566 */
1567struct intr_map_entry
1568{
1569	device_t 		dev;
1570	intptr_t 		xref;
1571	struct intr_map_data 	*map_data;
1572	struct intr_irqsrc 	*isrc;
1573	/* XXX TODO DISCONECTED PICs */
1574	/*int			flags */
1575};
1576
1577/* XXX Convert irq_map[] to dynamicaly expandable one. */
1578static struct intr_map_entry **irq_map;
1579static u_int irq_map_count;
1580static u_int irq_map_first_free_idx;
1581static struct mtx irq_map_lock;
1582
1583static struct intr_irqsrc *
1584intr_map_get_isrc(u_int res_id)
1585{
1586	struct intr_irqsrc *isrc;
1587
1588	isrc = NULL;
1589	mtx_lock(&irq_map_lock);
1590	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1591		isrc = irq_map[res_id]->isrc;
1592	mtx_unlock(&irq_map_lock);
1593
1594	return (isrc);
1595}
1596
1597static void
1598intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1599{
1600
1601	mtx_lock(&irq_map_lock);
1602	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1603		irq_map[res_id]->isrc = isrc;
1604	mtx_unlock(&irq_map_lock);
1605}
1606
1607/*
1608 * Get a copy of intr_map_entry data
1609 */
1610static struct intr_map_data *
1611intr_map_get_map_data(u_int res_id)
1612{
1613	struct intr_map_data *data;
1614
1615	data = NULL;
1616	mtx_lock(&irq_map_lock);
1617	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1618		panic("Attempt to copy invalid resource id: %u\n", res_id);
1619	data = irq_map[res_id]->map_data;
1620	mtx_unlock(&irq_map_lock);
1621
1622	return (data);
1623}
1624
1625/*
1626 * Get a copy of intr_map_entry data
1627 */
1628static void
1629intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1630    struct intr_map_data **data)
1631{
1632	size_t len;
1633
1634	len = 0;
1635	mtx_lock(&irq_map_lock);
1636	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1637		panic("Attempt to copy invalid resource id: %u\n", res_id);
1638	if (irq_map[res_id]->map_data != NULL)
1639		len = irq_map[res_id]->map_data->len;
1640	mtx_unlock(&irq_map_lock);
1641
1642	if (len == 0)
1643		*data = NULL;
1644	else
1645		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1646	mtx_lock(&irq_map_lock);
1647	if (irq_map[res_id] == NULL)
1648		panic("Attempt to copy invalid resource id: %u\n", res_id);
1649	if (len != 0) {
1650		if (len != irq_map[res_id]->map_data->len)
1651			panic("Resource id: %u has changed.\n", res_id);
1652		memcpy(*data, irq_map[res_id]->map_data, len);
1653	}
1654	*map_dev = irq_map[res_id]->dev;
1655	*map_xref = irq_map[res_id]->xref;
1656	mtx_unlock(&irq_map_lock);
1657}
1658
1659/*
1660 * Allocate and fill new entry in irq_map table.
1661 */
1662u_int
1663intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1664{
1665	u_int i;
1666	struct intr_map_entry *entry;
1667
1668	/* Prepare new entry first. */
1669	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1670
1671	entry->dev = dev;
1672	entry->xref = xref;
1673	entry->map_data = data;
1674	entry->isrc = NULL;
1675
1676	mtx_lock(&irq_map_lock);
1677	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1678		if (irq_map[i] == NULL) {
1679			irq_map[i] = entry;
1680			irq_map_first_free_idx = i + 1;
1681			mtx_unlock(&irq_map_lock);
1682			return (i);
1683		}
1684	}
1685	mtx_unlock(&irq_map_lock);
1686
1687	/* XXX Expand irq_map table */
1688	panic("IRQ mapping table is full.");
1689}
1690
1691/*
1692 * Remove and free mapping entry.
1693 */
1694void
1695intr_unmap_irq(u_int res_id)
1696{
1697	struct intr_map_entry *entry;
1698
1699	mtx_lock(&irq_map_lock);
1700	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1701		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1702	entry = irq_map[res_id];
1703	irq_map[res_id] = NULL;
1704	irq_map_first_free_idx = res_id;
1705	mtx_unlock(&irq_map_lock);
1706	intr_free_intr_map_data(entry->map_data);
1707	free(entry, M_INTRNG);
1708}
1709
1710/*
1711 * Clone mapping entry.
1712 */
1713u_int
1714intr_map_clone_irq(u_int old_res_id)
1715{
1716	device_t map_dev;
1717	intptr_t map_xref;
1718	struct intr_map_data *data;
1719
1720	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1721	return (intr_map_irq(map_dev, map_xref, data));
1722}
1723
1724static void
1725intr_map_init(void *dummy __unused)
1726{
1727
1728	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1729
1730	irq_map_count = 2 * intr_nirq;
1731	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1732	    M_INTRNG, M_WAITOK | M_ZERO);
1733}
1734SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1735